mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'main' into main
This commit is contained in:
commit
4278b183d0
18 changed files with 1000 additions and 102 deletions
BIN
.DS_Store
vendored
BIN
.DS_Store
vendored
Binary file not shown.
|
@ -13,6 +13,9 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
|
pip install infisical
|
||||||
|
pip install pytest
|
||||||
|
pip install openai[datalib]
|
||||||
|
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
|
|
40
README.md
40
README.md
|
@ -3,18 +3,16 @@
|
||||||
[](https://pypi.org/project/litellm/0.1.1/)
|
[](https://pypi.org/project/litellm/0.1.1/)
|
||||||
[](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
|
[](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
|
||||||

|

|
||||||
[](https://github.com/BerriAI/litellm)
|
[](https://github.com/BerriAI/litellm)
|
||||||
|
|
||||||
Get Support / Join the community 👉 [](https://discord.gg/wuPM9dRgDw)
|
[](https://discord.gg/wuPM9dRgDw)
|
||||||
|
|
||||||
a simple & light package to call OpenAI, Azure, Cohere, Anthropic API Endpoints
|
|
||||||
|
|
||||||
litellm manages:
|
|
||||||
- translating inputs to completion and embedding endpoints
|
|
||||||
- guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`
|
|
||||||
|
|
||||||
|
a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
|
||||||
|
- translating inputs to the provider's completion and embedding endpoints
|
||||||
|
- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
|
- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)
|
||||||
# usage
|
# usage
|
||||||
|
Demo - https://litellm.ai/ \
|
||||||
Read the docs - https://litellm.readthedocs.io/en/latest/
|
Read the docs - https://litellm.readthedocs.io/en/latest/
|
||||||
|
|
||||||
## quick start
|
## quick start
|
||||||
|
@ -25,11 +23,6 @@ pip install litellm
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
# ENV variables can be set in .env file, too. Example in .env.example
|
|
||||||
os.environ["OPENAI_API_KEY"] = "openai key"
|
|
||||||
os.environ["COHERE_API_KEY"] = "cohere key"
|
|
||||||
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
|
||||||
# openai call
|
# openai call
|
||||||
|
@ -41,6 +34,9 @@ response = completion("command-nightly", messages)
|
||||||
# azure openai call
|
# azure openai call
|
||||||
response = completion("chatgpt-test", messages, azure=True)
|
response = completion("chatgpt-test", messages, azure=True)
|
||||||
|
|
||||||
|
# hugging face call
|
||||||
|
response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)
|
||||||
|
|
||||||
# openrouter call
|
# openrouter call
|
||||||
response = completion("google/palm-2-codechat-bison", messages)
|
response = completion("google/palm-2-codechat-bison", messages)
|
||||||
```
|
```
|
||||||
|
@ -53,17 +49,23 @@ pip install litellm==0.1.345
|
||||||
|
|
||||||
## Streaming Queries
|
## Streaming Queries
|
||||||
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
||||||
|
Streaming is supported for OpenAI, Azure, Anthropic models
|
||||||
```python
|
```python
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta'])
|
print(chunk['choices'][0]['delta'])
|
||||||
|
|
||||||
|
# claude 2
|
||||||
|
result = completion('claude-2', messages, stream=True)
|
||||||
|
for chunk in result:
|
||||||
|
print(chunk['choices'][0]['delta'])
|
||||||
```
|
```
|
||||||
|
|
||||||
# hosted version
|
# support / talk with founders
|
||||||
- [Grab time if you want access 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||||
|
|
||||||
# why did we build this
|
# why did we build this
|
||||||
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
|
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
|
||||||
|
|
||||||
# Support
|
|
||||||
Contact us at ishaan@berri.ai / krrish@berri.ai
|
|
||||||
|
|
|
@ -0,0 +1,406 @@
|
||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "ZwuaylskLxFu",
|
||||||
|
"outputId": "d684d6a3-32fe-4beb-c378-c39134bcf8cc"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Collecting litellm==0.1.363\n",
|
||||||
|
" Downloading litellm-0.1.363-py3-none-any.whl (34 kB)\n",
|
||||||
|
"Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.27.8)\n",
|
||||||
|
"Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (1.0.0)\n",
|
||||||
|
"Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.4.0)\n",
|
||||||
|
"Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (2.31.0)\n",
|
||||||
|
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.65.0)\n",
|
||||||
|
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.8.5)\n",
|
||||||
|
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.363) (2022.10.31)\n",
|
||||||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.2.0)\n",
|
||||||
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.4)\n",
|
||||||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.26.16)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (2023.7.22)\n",
|
||||||
|
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (23.1.0)\n",
|
||||||
|
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (6.0.4)\n",
|
||||||
|
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.0.2)\n",
|
||||||
|
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.9.2)\n",
|
||||||
|
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.4.0)\n",
|
||||||
|
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.3.1)\n",
|
||||||
|
"Installing collected packages: litellm\n",
|
||||||
|
" Attempting uninstall: litellm\n",
|
||||||
|
" Found existing installation: litellm 0.1.362\n",
|
||||||
|
" Uninstalling litellm-0.1.362:\n",
|
||||||
|
" Successfully uninstalled litellm-0.1.362\n",
|
||||||
|
"Successfully installed litellm-0.1.363\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install litellm==\"0.1.363\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# @title Import litellm & Set env variables\n",
|
||||||
|
"import litellm\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ[\"ANTHROPIC_API_KEY\"] = \" \" #@param"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "W216G__XL19Q"
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# @title Request Claude Instant-1 and Claude-2\n",
|
||||||
|
"messages = [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
"result = litellm.completion('claude-instant-1', messages)\n",
|
||||||
|
"print(\"\\n\\n Result from claude-instant-1\", result)\n",
|
||||||
|
"result = litellm.completion('claude-2', messages, max_tokens=5, temperature=0.2)\n",
|
||||||
|
"print(\"\\n\\n Result from claude-2\", result)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "ff1lKwUMMLJj",
|
||||||
|
"outputId": "bfddf6f8-36d4-45e5-92dc-349083fa41b8"
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" Result from claude-instant-1 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" The Los Angeles Dodgers won the 2020 World Series, defeating the Tampa Bay Rays 4-2. It was the Dodgers' first World Series title since 1988.\"}}], 'created': 1691536677.2676156, 'model': 'claude-instant-1', 'usage': {'prompt_tokens': 30, 'completion_tokens': 32, 'total_tokens': 62}}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" Result from claude-2 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ' The Los Angeles Dodgers won'}}], 'created': 1691536677.944753, 'model': 'claude-2', 'usage': {'prompt_tokens': 30, 'completion_tokens': 5, 'total_tokens': 35}}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# @title Streaming Example: Request Claude-2\n",
|
||||||
|
"messages = [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": \"how does a court case get to the Supreme Court?\"}\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
"result = litellm.completion('claude-2', messages, stream=True)\n",
|
||||||
|
"for chunk in result:\n",
|
||||||
|
" print(chunk['choices'][0]['delta'])\n",
|
||||||
|
"\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "06hWKnNQMrV-",
|
||||||
|
"outputId": "7fdec0eb-d4a9-4882-f9c4-987ff9a31114"
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
" Here\n",
|
||||||
|
"'s\n",
|
||||||
|
" a\n",
|
||||||
|
" quick\n",
|
||||||
|
" overview\n",
|
||||||
|
" of\n",
|
||||||
|
" how\n",
|
||||||
|
" a\n",
|
||||||
|
" court\n",
|
||||||
|
" case\n",
|
||||||
|
" can\n",
|
||||||
|
" reach\n",
|
||||||
|
" the\n",
|
||||||
|
" U\n",
|
||||||
|
".\n",
|
||||||
|
"S\n",
|
||||||
|
".\n",
|
||||||
|
" Supreme\n",
|
||||||
|
" Court\n",
|
||||||
|
":\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" The\n",
|
||||||
|
" case\n",
|
||||||
|
" must\n",
|
||||||
|
" first\n",
|
||||||
|
" be\n",
|
||||||
|
" heard\n",
|
||||||
|
" in\n",
|
||||||
|
" a\n",
|
||||||
|
" lower\n",
|
||||||
|
" trial\n",
|
||||||
|
" court\n",
|
||||||
|
" (\n",
|
||||||
|
"either\n",
|
||||||
|
" a\n",
|
||||||
|
" state\n",
|
||||||
|
" court\n",
|
||||||
|
" or\n",
|
||||||
|
" federal\n",
|
||||||
|
" district\n",
|
||||||
|
" court\n",
|
||||||
|
").\n",
|
||||||
|
" The\n",
|
||||||
|
" trial\n",
|
||||||
|
" court\n",
|
||||||
|
" makes\n",
|
||||||
|
" initial\n",
|
||||||
|
" r\n",
|
||||||
|
"ulings\n",
|
||||||
|
" and\n",
|
||||||
|
" produces\n",
|
||||||
|
" a\n",
|
||||||
|
" record\n",
|
||||||
|
" of\n",
|
||||||
|
" the\n",
|
||||||
|
" case\n",
|
||||||
|
".\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" The\n",
|
||||||
|
" losing\n",
|
||||||
|
" party\n",
|
||||||
|
" can\n",
|
||||||
|
" appeal\n",
|
||||||
|
" the\n",
|
||||||
|
" decision\n",
|
||||||
|
" to\n",
|
||||||
|
" an\n",
|
||||||
|
" appeals\n",
|
||||||
|
" court\n",
|
||||||
|
" (\n",
|
||||||
|
"a\n",
|
||||||
|
" state\n",
|
||||||
|
" appeals\n",
|
||||||
|
" court\n",
|
||||||
|
" for\n",
|
||||||
|
" state\n",
|
||||||
|
" cases\n",
|
||||||
|
",\n",
|
||||||
|
" or\n",
|
||||||
|
" a\n",
|
||||||
|
" federal\n",
|
||||||
|
" circuit\n",
|
||||||
|
" court\n",
|
||||||
|
" for\n",
|
||||||
|
" federal\n",
|
||||||
|
" cases\n",
|
||||||
|
").\n",
|
||||||
|
" The\n",
|
||||||
|
" appeals\n",
|
||||||
|
" court\n",
|
||||||
|
" reviews\n",
|
||||||
|
" the\n",
|
||||||
|
" trial\n",
|
||||||
|
" court\n",
|
||||||
|
"'s\n",
|
||||||
|
" r\n",
|
||||||
|
"ulings\n",
|
||||||
|
" and\n",
|
||||||
|
" can\n",
|
||||||
|
" affirm\n",
|
||||||
|
",\n",
|
||||||
|
" reverse\n",
|
||||||
|
",\n",
|
||||||
|
" or\n",
|
||||||
|
" modify\n",
|
||||||
|
" the\n",
|
||||||
|
" decision\n",
|
||||||
|
".\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" If\n",
|
||||||
|
" a\n",
|
||||||
|
" party\n",
|
||||||
|
" is\n",
|
||||||
|
" still\n",
|
||||||
|
" unsat\n",
|
||||||
|
"isf\n",
|
||||||
|
"ied\n",
|
||||||
|
" after\n",
|
||||||
|
" the\n",
|
||||||
|
" appeals\n",
|
||||||
|
" court\n",
|
||||||
|
" rules\n",
|
||||||
|
",\n",
|
||||||
|
" they\n",
|
||||||
|
" can\n",
|
||||||
|
" petition\n",
|
||||||
|
" the\n",
|
||||||
|
" Supreme\n",
|
||||||
|
" Court\n",
|
||||||
|
" to\n",
|
||||||
|
" hear\n",
|
||||||
|
" the\n",
|
||||||
|
" case\n",
|
||||||
|
" through\n",
|
||||||
|
" a\n",
|
||||||
|
" writ\n",
|
||||||
|
" of\n",
|
||||||
|
" cert\n",
|
||||||
|
"ior\n",
|
||||||
|
"ari\n",
|
||||||
|
".\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" The\n",
|
||||||
|
" Supreme\n",
|
||||||
|
" Court\n",
|
||||||
|
" gets\n",
|
||||||
|
" thousands\n",
|
||||||
|
" of\n",
|
||||||
|
" cert\n",
|
||||||
|
" petitions\n",
|
||||||
|
" every\n",
|
||||||
|
" year\n",
|
||||||
|
" but\n",
|
||||||
|
" usually\n",
|
||||||
|
" only\n",
|
||||||
|
" agrees\n",
|
||||||
|
" to\n",
|
||||||
|
" hear\n",
|
||||||
|
" about\n",
|
||||||
|
" 100\n",
|
||||||
|
"-\n",
|
||||||
|
"150\n",
|
||||||
|
" of\n",
|
||||||
|
" cases\n",
|
||||||
|
" that\n",
|
||||||
|
" have\n",
|
||||||
|
" significant\n",
|
||||||
|
" national\n",
|
||||||
|
" importance\n",
|
||||||
|
" or\n",
|
||||||
|
" where\n",
|
||||||
|
" lower\n",
|
||||||
|
" courts\n",
|
||||||
|
" disagree\n",
|
||||||
|
" on\n",
|
||||||
|
" federal\n",
|
||||||
|
" law\n",
|
||||||
|
".\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" If\n",
|
||||||
|
" 4\n",
|
||||||
|
" out\n",
|
||||||
|
" of\n",
|
||||||
|
" the\n",
|
||||||
|
" 9\n",
|
||||||
|
" Just\n",
|
||||||
|
"ices\n",
|
||||||
|
" vote\n",
|
||||||
|
" to\n",
|
||||||
|
" grant\n",
|
||||||
|
" cert\n",
|
||||||
|
" (\n",
|
||||||
|
"agree\n",
|
||||||
|
" to\n",
|
||||||
|
" hear\n",
|
||||||
|
" the\n",
|
||||||
|
" case\n",
|
||||||
|
"),\n",
|
||||||
|
" it\n",
|
||||||
|
" goes\n",
|
||||||
|
" on\n",
|
||||||
|
" the\n",
|
||||||
|
" Supreme\n",
|
||||||
|
" Court\n",
|
||||||
|
"'s\n",
|
||||||
|
" do\n",
|
||||||
|
"cket\n",
|
||||||
|
" for\n",
|
||||||
|
" arguments\n",
|
||||||
|
".\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"-\n",
|
||||||
|
" The\n",
|
||||||
|
" Supreme\n",
|
||||||
|
" Court\n",
|
||||||
|
" then\n",
|
||||||
|
" hears\n",
|
||||||
|
" oral\n",
|
||||||
|
" arguments\n",
|
||||||
|
",\n",
|
||||||
|
" considers\n",
|
||||||
|
" written\n",
|
||||||
|
" brief\n",
|
||||||
|
"s\n",
|
||||||
|
",\n",
|
||||||
|
" examines\n",
|
||||||
|
" the\n",
|
||||||
|
" lower\n",
|
||||||
|
" court\n",
|
||||||
|
" records\n",
|
||||||
|
",\n",
|
||||||
|
" and\n",
|
||||||
|
" issues\n",
|
||||||
|
" a\n",
|
||||||
|
" final\n",
|
||||||
|
" ruling\n",
|
||||||
|
" on\n",
|
||||||
|
" the\n",
|
||||||
|
" case\n",
|
||||||
|
",\n",
|
||||||
|
" which\n",
|
||||||
|
" serves\n",
|
||||||
|
" as\n",
|
||||||
|
" binding\n",
|
||||||
|
" precedent\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
153
cookbook/liteLLM_Hugging_Face_Example.ipynb
Normal file
153
cookbook/liteLLM_Hugging_Face_Example.ipynb
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## Install liteLLM https://github.com/BerriAI/litellm\n",
|
||||||
|
"liteLLM provides one interface to call gpt 3.5, hugging face inference endpoints"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "IGQZtR61AZSd"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "x_4jcmmXcdm-",
|
||||||
|
"outputId": "c89e7817-561d-4867-904b-aa1634565cbb"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: litellm==0.1.362 in /usr/local/lib/python3.10/dist-packages (0.1.362)\n",
|
||||||
|
"Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.27.8)\n",
|
||||||
|
"Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (1.0.0)\n",
|
||||||
|
"Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.4.0)\n",
|
||||||
|
"Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (2.28.2)\n",
|
||||||
|
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.65.0)\n",
|
||||||
|
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.8.5)\n",
|
||||||
|
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.362) (2022.10.31)\n",
|
||||||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.2.0)\n",
|
||||||
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.4)\n",
|
||||||
|
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.26.16)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (2023.7.22)\n",
|
||||||
|
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (23.1.0)\n",
|
||||||
|
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (6.0.4)\n",
|
||||||
|
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.0.2)\n",
|
||||||
|
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.9.2)\n",
|
||||||
|
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.4.0)\n",
|
||||||
|
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.3.1)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install litellm==\"0.1.362\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"import os\n",
|
||||||
|
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
|
||||||
|
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ['HF_TOKEN'] = \"\"#@param\n",
|
||||||
|
"# get your hugging face token from here:\n",
|
||||||
|
"# https://huggingface.co/settings/tokens\n",
|
||||||
|
"\n",
|
||||||
|
"# Optional if you want to run OpenAI TOO\n",
|
||||||
|
"os.environ['OPENAI_API_KEY'] = \"\" #@param\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\"stabilityai/stablecode-completion-alpha-3b-4k\", messages=messages, hugging_face=True)\n",
|
||||||
|
"print(\"Response from stabilityai/stablecode-completion-alpha-3b-4k\")\n",
|
||||||
|
"print(response['choices'][0]['message']['content'])\n",
|
||||||
|
"print(\"\\n\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\"bigcode/starcoder\", messages=messages, hugging_face=True)\n",
|
||||||
|
"print(\"Response from bigcode/starcoder\")\n",
|
||||||
|
"print(response['choices'][0]['message']['content'])\n",
|
||||||
|
"print(\"\\n\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\"google/flan-t5-xxl\", messages=messages, hugging_face=True)\n",
|
||||||
|
"print(\"Response from google/flan-t5-xxl\")\n",
|
||||||
|
"print(response['choices'][0]['message']['content'])\n",
|
||||||
|
"print(\"\\n\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\"google/flan-t5-large\", messages=messages, hugging_face=True)\n",
|
||||||
|
"print(\"Response from google/flan-t5-large\")\n",
|
||||||
|
"print(response['choices'][0]['message']['content'])\n",
|
||||||
|
"print(\"\\n\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(model=\"gpt-3.5-turbo\", messages=messages)\n",
|
||||||
|
"print(response['choices'][0]['message']['content'])\n",
|
||||||
|
"print(response)\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "vC54VW3jvLnN",
|
||||||
|
"outputId": "e6616221-12c9-4313-dd03-fd94fa095e8e"
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Response from stabilityai/stablecode-completion-alpha-3b-4k\n",
|
||||||
|
"Hello, whats the weather in San Francisco??\",\n",
|
||||||
|
" \"id\": 1,\n",
|
||||||
|
" \"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Response from bigcode/starcoder\n",
|
||||||
|
"Hello, whats the weather in San Francisco??\")\n",
|
||||||
|
"\n",
|
||||||
|
"# print(response)\n",
|
||||||
|
"\n",
|
||||||
|
"# print(response.text)\n",
|
||||||
|
"\n",
|
||||||
|
"#\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Response from google/flan-t5-xxl\n",
|
||||||
|
"a little cold\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Response from google/flan-t5-large\n",
|
||||||
|
"cool\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"I'm sorry, but I am an AI language model and do not have real-time data. However, you can check the weather in San Francisco by searching for \"San Francisco weather\" on a search engine or checking a reliable weather website or app.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -34,5 +34,26 @@
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
| claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
|
||||||
|
### Hugging Face Inference API
|
||||||
|
|
||||||
|
All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps:
|
||||||
|
|
||||||
|
* Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call.
|
||||||
|
* Set `hugging_face` parameter to `True`.
|
||||||
|
* Make sure to set the hugging face API key
|
||||||
|
|
||||||
|
Here are some examples of supported models:
|
||||||
|
**Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.**
|
||||||
|
|
||||||
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|
|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
|
||||||
|
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` |
|
||||||
|
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` |
|
||||||
|
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` |
|
||||||
|
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
45
docs/token_usage.md
Normal file
45
docs/token_usage.md
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Token Usage
|
||||||
|
By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
|
||||||
|
|
||||||
|
However, we also expose 3 public helper functions to calculate token usage across providers:
|
||||||
|
|
||||||
|
- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available.
|
||||||
|
|
||||||
|
- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
|
||||||
|
|
||||||
|
- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output).
|
||||||
|
|
||||||
|
## Example Usage
|
||||||
|
|
||||||
|
1. `token_counter`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import token_counter
|
||||||
|
|
||||||
|
messages = [{"user": "role", "content": "Hey, how's it going"}]
|
||||||
|
print(token_counter(model="gpt-3.5-turbo", messages=messages))
|
||||||
|
```
|
||||||
|
|
||||||
|
2. `cost_per_token`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import cost_per_token
|
||||||
|
|
||||||
|
prompt_tokens = 5
|
||||||
|
completion_tokens = 10
|
||||||
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
|
||||||
|
|
||||||
|
print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. `completion_cost`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion_cost
|
||||||
|
|
||||||
|
prompt = "Hey, how's it going"
|
||||||
|
completion = "Hi, I'm gpt - I am doing well"
|
||||||
|
cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
|
||||||
|
|
||||||
|
print(cost_of_query)
|
||||||
|
```
|
|
@ -4,13 +4,34 @@ failure_callback = []
|
||||||
set_verbose=False
|
set_verbose=False
|
||||||
telemetry=True
|
telemetry=True
|
||||||
max_tokens = 256 # OpenAI Defaults
|
max_tokens = 256 # OpenAI Defaults
|
||||||
retry = True # control tenacity retries.
|
retry = True
|
||||||
openai_key = None
|
openai_key = None
|
||||||
azure_key = None
|
azure_key = None
|
||||||
anthropic_key = None
|
anthropic_key = None
|
||||||
replicate_key = None
|
replicate_key = None
|
||||||
cohere_key = None
|
cohere_key = None
|
||||||
openrouter_key = None
|
openrouter_key = None
|
||||||
|
|
||||||
|
hugging_api_token = None
|
||||||
|
|
||||||
|
model_cost = {
|
||||||
|
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||||
|
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||||
|
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
||||||
|
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||||
|
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||||
|
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||||
|
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||||
|
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||||
|
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||||
|
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||||
|
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||||
|
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||||
|
}
|
||||||
|
|
||||||
####### THREAD-SPECIFIC DATA ###################
|
####### THREAD-SPECIFIC DATA ###################
|
||||||
class MyLocal(threading.local):
|
class MyLocal(threading.local):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -83,7 +104,7 @@ open_ai_embedding_models = [
|
||||||
'text-embedding-ada-002'
|
'text-embedding-ada-002'
|
||||||
]
|
]
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .utils import client, logging, exception_type, get_optional_params, modify_integration
|
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
from .integrations import *
|
from .integrations import *
|
||||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
|
@ -2,7 +2,6 @@
|
||||||
# On success, logs events to Helicone
|
# On success, logs events to Helicone
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
from anthropic import HUMAN_PROMPT, AI_PROMPT
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
class HeliconeLogger:
|
class HeliconeLogger:
|
||||||
|
@ -14,6 +13,7 @@ class HeliconeLogger:
|
||||||
self.key = os.getenv('HELICONE_API_KEY')
|
self.key = os.getenv('HELICONE_API_KEY')
|
||||||
|
|
||||||
def claude_mapping(self, model, messages, response_obj):
|
def claude_mapping(self, model, messages, response_obj):
|
||||||
|
from anthropic import HUMAN_PROMPT, AI_PROMPT
|
||||||
prompt = f"{HUMAN_PROMPT}"
|
prompt = f"{HUMAN_PROMPT}"
|
||||||
for message in messages:
|
for message in messages:
|
||||||
if "role" in message:
|
if "role" in message:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import os, openai, cohere, replicate, sys
|
import os, openai, sys
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import dotenv, traceback, random, asyncio, time
|
import dotenv, traceback, random, asyncio, time
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
@ -8,15 +7,9 @@ import litellm
|
||||||
from litellm import client, logging, exception_type, timeout, get_optional_params
|
from litellm import client, logging, exception_type, timeout, get_optional_params
|
||||||
import tiktoken
|
import tiktoken
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from tenacity import (
|
from litellm.utils import get_secret, install_and_import, CustomStreamWrapper
|
||||||
retry,
|
|
||||||
stop_after_attempt,
|
|
||||||
wait_random_exponential,
|
|
||||||
) # for exponential backoff
|
|
||||||
from litellm.utils import get_secret
|
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
|
||||||
new_response = {
|
new_response = {
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
|
@ -28,9 +21,7 @@ new_response = {
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
# TODO move this to utils.py
|
|
||||||
# TODO add translations
|
# TODO add translations
|
||||||
# TODO see if this worked - model_name == krrish
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
async def acompletion(*args, **kwargs):
|
async def acompletion(*args, **kwargs):
|
||||||
|
@ -52,7 +43,8 @@ def completion(
|
||||||
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
||||||
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
|
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False,
|
||||||
|
hugging_face = False, replicate=False,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
global new_response
|
global new_response
|
||||||
|
@ -61,13 +53,16 @@ def completion(
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
functions=functions, function_call=function_call,
|
functions=functions, function_call=function_call,
|
||||||
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
||||||
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id
|
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
|
||||||
|
# params to identify the model
|
||||||
|
model=model, replicate=replicate, hugging_face=hugging_face
|
||||||
)
|
)
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
|
openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
|
||||||
openai.api_version = litellm.api_version if litellm.api_version is not None else get_secret("AZURE_API_VERSION")
|
openai.api_version = litellm.api_version if litellm.api_version is not None else get_secret("AZURE_API_VERSION")
|
||||||
|
# set key
|
||||||
if api_key:
|
if api_key:
|
||||||
openai.api_key = api_key
|
openai.api_key = api_key
|
||||||
elif litellm.azure_key:
|
elif litellm.azure_key:
|
||||||
|
@ -92,6 +87,7 @@ def completion(
|
||||||
)
|
)
|
||||||
elif model in litellm.open_ai_chat_completion_models:
|
elif model in litellm.open_ai_chat_completion_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
|
# note: if a user sets a custom base - we should ensure this works
|
||||||
openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
|
openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
if litellm.organization:
|
if litellm.organization:
|
||||||
|
@ -154,7 +150,10 @@ def completion(
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = response["usage"]
|
model_response["usage"] = response["usage"]
|
||||||
response = model_response
|
response = model_response
|
||||||
elif "replicate" in model:
|
elif "replicate" in model or replicate == True:
|
||||||
|
# import replicate/if it fails then pip install replicate
|
||||||
|
install_and_import("replicate")
|
||||||
|
import replicate
|
||||||
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
# checking in case user set it to REPLICATE_API_KEY instead
|
# checking in case user set it to REPLICATE_API_KEY instead
|
||||||
if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"):
|
if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"):
|
||||||
|
@ -175,6 +174,11 @@ def completion(
|
||||||
output = replicate.run(
|
output = replicate.run(
|
||||||
model,
|
model,
|
||||||
input=input)
|
input=input)
|
||||||
|
if 'stream' in optional_params and optional_params['stream'] == True:
|
||||||
|
# don't try to access stream object,
|
||||||
|
# let the stream handler know this is replicate
|
||||||
|
response = CustomStreamWrapper(output, "replicate")
|
||||||
|
return response
|
||||||
response = ""
|
response = ""
|
||||||
for item in output:
|
for item in output:
|
||||||
response += item
|
response += item
|
||||||
|
@ -194,6 +198,10 @@ def completion(
|
||||||
}
|
}
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.anthropic_models:
|
elif model in litellm.anthropic_models:
|
||||||
|
# import anthropic/if it fails then pip install anthropic
|
||||||
|
install_and_import("anthropic")
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
|
||||||
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||||
if api_key:
|
if api_key:
|
||||||
os.environ["ANTHROPIC_API_KEY"] = api_key
|
os.environ["ANTHROPIC_API_KEY"] = api_key
|
||||||
|
@ -220,8 +228,14 @@ def completion(
|
||||||
completion = anthropic.completions.create(
|
completion = anthropic.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
max_tokens_to_sample=max_tokens_to_sample
|
max_tokens_to_sample=max_tokens_to_sample,
|
||||||
|
**optional_params
|
||||||
)
|
)
|
||||||
|
if 'stream' in optional_params and optional_params['stream'] == True:
|
||||||
|
# don't try to access stream object,
|
||||||
|
response = CustomStreamWrapper(completion, model)
|
||||||
|
return response
|
||||||
|
|
||||||
completion_response = completion.completion
|
completion_response = completion.completion
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
|
@ -274,6 +288,9 @@ def completion(
|
||||||
**optional_params
|
**optional_params
|
||||||
)
|
)
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
|
# import cohere/if it fails then pip install cohere
|
||||||
|
install_and_import("cohere")
|
||||||
|
import cohere
|
||||||
if api_key:
|
if api_key:
|
||||||
cohere_key = api_key
|
cohere_key = api_key
|
||||||
elif litellm.cohere_key:
|
elif litellm.cohere_key:
|
||||||
|
@ -287,8 +304,14 @@ def completion(
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = co.generate(
|
response = co.generate(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt,
|
||||||
|
**optional_params
|
||||||
)
|
)
|
||||||
|
if 'stream' in optional_params and optional_params['stream'] == True:
|
||||||
|
# don't try to access stream object,
|
||||||
|
response = CustomStreamWrapper(response, model)
|
||||||
|
return response
|
||||||
|
|
||||||
completion_response = response[0].text
|
completion_response = response[0].text
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
|
@ -304,6 +327,33 @@ def completion(
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens
|
||||||
}
|
}
|
||||||
response = model_response
|
response = model_response
|
||||||
|
elif hugging_face == True:
|
||||||
|
import requests
|
||||||
|
API_URL = f"https://api-inference.huggingface.co/models/{model}"
|
||||||
|
HF_TOKEN = get_secret("HF_TOKEN")
|
||||||
|
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
||||||
|
|
||||||
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
input_payload = {"inputs": prompt}
|
||||||
|
response = requests.post(API_URL, headers=headers, json=input_payload)
|
||||||
|
|
||||||
|
completion_response = response.json()[0]['generated_text']
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
model_response["created"] = time.time()
|
||||||
|
model_response["model"] = model
|
||||||
|
model_response["usage"] = {
|
||||||
|
"prompt_tokens": prompt_tokens,
|
||||||
|
"completion_tokens": completion_tokens,
|
||||||
|
"total_tokens": prompt_tokens + completion_tokens
|
||||||
|
}
|
||||||
|
response = model_response
|
||||||
else:
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
test 1
|
|
|
@ -7,8 +7,10 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
from infisical import InfisicalClient
|
||||||
|
|
||||||
# litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
|
||||||
|
|
||||||
user_message = "Hello, whats the weather in San Francisco??"
|
user_message = "Hello, whats the weather in San Francisco??"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
@ -16,6 +18,59 @@ messages = [{ "content": user_message,"role": "user"}]
|
||||||
def logger_fn(user_model_dict):
|
def logger_fn(user_model_dict):
|
||||||
print(f"user_model_dict: {user_model_dict}")
|
print(f"user_model_dict: {user_model_dict}")
|
||||||
|
|
||||||
|
def test_completion_claude():
|
||||||
|
try:
|
||||||
|
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_claude_stream():
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
|
||||||
|
]
|
||||||
|
response = completion(model="claude-2", messages=messages, stream=True)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk['choices'][0]['delta']) # same as openai format
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_hf_api():
|
||||||
|
try:
|
||||||
|
user_message = "write some code to find the sum of two numbers"
|
||||||
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_cohere():
|
||||||
|
try:
|
||||||
|
response = completion(model="command-nightly", messages=messages, max_tokens=500)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_cohere_stream():
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
|
||||||
|
]
|
||||||
|
response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk['choices'][0]['delta']) # same as openai format
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_openai():
|
def test_completion_openai():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
@ -92,18 +147,25 @@ def test_completion_azure():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_claude():
|
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
|
||||||
|
def test_completion_replicate_llama_stream():
|
||||||
|
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(model=model_name, messages=messages, stream=True)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
for result in response:
|
||||||
|
print(result)
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_cohere():
|
def test_completion_replicate_stability_stream():
|
||||||
|
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||||
try:
|
try:
|
||||||
response = completion(model="command-nightly", messages=messages, max_tokens=500)
|
response = completion(model=model_name, messages=messages, stream=True, replicate=True)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
for result in response:
|
||||||
|
print(result)
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
@ -124,3 +186,14 @@ def test_completion_cohere():
|
||||||
# pass
|
# pass
|
||||||
# else:
|
# else:
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_replicate_stability():
|
||||||
|
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||||
|
try:
|
||||||
|
response = completion(model=model_name, messages=messages, replicate=True)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for result in response:
|
||||||
|
print(result)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
20
litellm/tests/test_embedding.py
Normal file
20
litellm/tests/test_embedding.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
from litellm import embedding, completion
|
||||||
|
from infisical import InfisicalClient
|
||||||
|
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
|
||||||
|
|
||||||
|
def test_openai_embedding():
|
||||||
|
try:
|
||||||
|
response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(f"response: {str(response)}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
195
litellm/utils.py
195
litellm/utils.py
|
@ -4,7 +4,6 @@ import subprocess, os
|
||||||
import litellm, openai
|
import litellm, openai
|
||||||
import random, uuid, requests
|
import random, uuid, requests
|
||||||
import datetime, time
|
import datetime, time
|
||||||
from anthropic import Anthropic
|
|
||||||
import tiktoken
|
import tiktoken
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from .integrations.helicone import HeliconeLogger
|
from .integrations.helicone import HeliconeLogger
|
||||||
|
@ -34,6 +33,19 @@ def print_verbose(print_statement):
|
||||||
if random.random() <= 0.3:
|
if random.random() <= 0.3:
|
||||||
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
||||||
|
|
||||||
|
####### Package Import Handler ###################
|
||||||
|
import importlib
|
||||||
|
import subprocess
|
||||||
|
def install_and_import(package):
|
||||||
|
try:
|
||||||
|
importlib.import_module(package)
|
||||||
|
except ImportError:
|
||||||
|
print(f"{package} is not installed. Installing...")
|
||||||
|
subprocess.call([sys.executable, '-m', 'pip', 'install', package])
|
||||||
|
finally:
|
||||||
|
globals()[package] = importlib.import_module(package)
|
||||||
|
##################################################
|
||||||
|
|
||||||
####### LOGGING ###################
|
####### LOGGING ###################
|
||||||
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
|
def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
|
||||||
|
@ -119,6 +131,51 @@ def client(original_function):
|
||||||
raise e
|
raise e
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
####### USAGE CALCULATOR ################
|
||||||
|
|
||||||
|
def token_counter(model, text):
|
||||||
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
|
num_tokens = 0
|
||||||
|
if "claude" in model:
|
||||||
|
install_and_import('anthropic')
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
anthropic = Anthropic()
|
||||||
|
num_tokens = anthropic.count_tokens(text)
|
||||||
|
else:
|
||||||
|
num_tokens = len(encoding.encode(text))
|
||||||
|
return num_tokens
|
||||||
|
|
||||||
|
|
||||||
|
def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0):
|
||||||
|
## given
|
||||||
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
|
completion_tokens_cost_usd_dollar = 0
|
||||||
|
model_cost_ref = litellm.model_cost
|
||||||
|
if model in model_cost_ref:
|
||||||
|
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
else:
|
||||||
|
# calculate average input cost
|
||||||
|
input_cost_sum = 0
|
||||||
|
output_cost_sum = 0
|
||||||
|
model_cost_ref = litellm.model_cost
|
||||||
|
for model in model_cost_ref:
|
||||||
|
input_cost_sum += model_cost_ref[model]["input_cost_per_token"]
|
||||||
|
output_cost_sum += model_cost_ref[model]["output_cost_per_token"]
|
||||||
|
avg_input_cost = input_cost_sum / len(model_cost_ref.keys())
|
||||||
|
avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
|
||||||
|
prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
|
||||||
|
completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
|
||||||
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
|
|
||||||
|
def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
|
||||||
|
prompt_tokens = tokenizer(model=model, text=prompt)
|
||||||
|
completion_tokens = tokenizer(model=model, text=completion)
|
||||||
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens)
|
||||||
|
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def get_optional_params(
|
def get_optional_params(
|
||||||
# 12 optional params
|
# 12 optional params
|
||||||
|
@ -134,35 +191,66 @@ def get_optional_params(
|
||||||
frequency_penalty = 0,
|
frequency_penalty = 0,
|
||||||
logit_bias = {},
|
logit_bias = {},
|
||||||
user = "",
|
user = "",
|
||||||
deployment_id = None
|
deployment_id = None,
|
||||||
|
model = None,
|
||||||
|
replicate = False,
|
||||||
|
hugging_face = False,
|
||||||
):
|
):
|
||||||
optional_params = {}
|
optional_params = {}
|
||||||
if functions != []:
|
if model in litellm.anthropic_models:
|
||||||
optional_params["functions"] = functions
|
# handle anthropic params
|
||||||
if function_call != "":
|
if stream:
|
||||||
optional_params["function_call"] = function_call
|
|
||||||
if temperature != 1:
|
|
||||||
optional_params["temperature"] = temperature
|
|
||||||
if top_p != 1:
|
|
||||||
optional_params["top_p"] = top_p
|
|
||||||
if n != 1:
|
|
||||||
optional_params["n"] = n
|
|
||||||
if stream:
|
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
if stop != None:
|
if stop != None:
|
||||||
optional_params["stop"] = stop
|
optional_params["stop_sequences"] = stop
|
||||||
if max_tokens != float('inf'):
|
if temperature != 1:
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["temperature"] = temperature
|
||||||
if presence_penalty != 0:
|
if top_p != 1:
|
||||||
optional_params["presence_penalty"] = presence_penalty
|
optional_params["top_p"] = top_p
|
||||||
if frequency_penalty != 0:
|
return optional_params
|
||||||
optional_params["frequency_penalty"] = frequency_penalty
|
elif model in litellm.cohere_models:
|
||||||
if logit_bias != {}:
|
# handle cohere params
|
||||||
optional_params["logit_bias"] = logit_bias
|
if stream:
|
||||||
if user != "":
|
optional_params["stream"] = stream
|
||||||
optional_params["user"] = user
|
if temperature != 1:
|
||||||
if deployment_id != None:
|
optional_params["temperature"] = temperature
|
||||||
optional_params["deployment_id"] = deployment_id
|
if max_tokens != float('inf'):
|
||||||
|
optional_params["max_tokens"] = max_tokens
|
||||||
|
return optional_params
|
||||||
|
elif replicate == True:
|
||||||
|
# any replicate models
|
||||||
|
# TODO: handle translating remaining replicate params
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
return optional_params
|
||||||
|
else:# assume passing in params for openai/azure openai
|
||||||
|
if functions != []:
|
||||||
|
optional_params["functions"] = functions
|
||||||
|
if function_call != "":
|
||||||
|
optional_params["function_call"] = function_call
|
||||||
|
if temperature != 1:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p != 1:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
|
if n != 1:
|
||||||
|
optional_params["n"] = n
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
if stop != None:
|
||||||
|
optional_params["stop"] = stop
|
||||||
|
if max_tokens != float('inf'):
|
||||||
|
optional_params["max_tokens"] = max_tokens
|
||||||
|
if presence_penalty != 0:
|
||||||
|
optional_params["presence_penalty"] = presence_penalty
|
||||||
|
if frequency_penalty != 0:
|
||||||
|
optional_params["frequency_penalty"] = frequency_penalty
|
||||||
|
if logit_bias != {}:
|
||||||
|
optional_params["logit_bias"] = logit_bias
|
||||||
|
if user != "":
|
||||||
|
optional_params["user"] = user
|
||||||
|
if deployment_id != None:
|
||||||
|
optional_params["deployment_id"] = deployment_id
|
||||||
|
return optional_params
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def set_callbacks(callback_list):
|
def set_callbacks(callback_list):
|
||||||
|
@ -324,19 +412,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
logging(logger_fn=user_logger_fn, exception=e)
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def prompt_token_calculator(model, messages):
|
|
||||||
# use tiktoken or anthropic's tokenizer depending on the model
|
|
||||||
text = " ".join(message["content"] for message in messages)
|
|
||||||
num_tokens = 0
|
|
||||||
if "claude" in model:
|
|
||||||
anthropic = Anthropic()
|
|
||||||
num_tokens = anthropic.count_tokens(text)
|
|
||||||
else:
|
|
||||||
num_tokens = len(encoding.encode(text))
|
|
||||||
return num_tokens
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def handle_success(args, kwargs, result, start_time, end_time):
|
def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
global heliconeLogger, aispendLogger
|
global heliconeLogger, aispendLogger
|
||||||
try:
|
try:
|
||||||
|
@ -396,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def prompt_token_calculator(model, messages):
|
||||||
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
|
text = " ".join(message["content"] for message in messages)
|
||||||
|
num_tokens = 0
|
||||||
|
if "claude" in model:
|
||||||
|
install_and_import('anthropic')
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
anthropic = Anthropic()
|
||||||
|
num_tokens = anthropic.count_tokens(text)
|
||||||
|
else:
|
||||||
|
num_tokens = len(encoding.encode(text))
|
||||||
|
return num_tokens
|
||||||
|
|
||||||
# integration helper function
|
# integration helper function
|
||||||
def modify_integration(integration_name, integration_params):
|
def modify_integration(integration_name, integration_params):
|
||||||
global supabaseClient
|
global supabaseClient
|
||||||
|
@ -520,3 +608,30 @@ def get_secret(secret_name):
|
||||||
return os.environ.get(secret_name)
|
return os.environ.get(secret_name)
|
||||||
else:
|
else:
|
||||||
return os.environ.get(secret_name)
|
return os.environ.get(secret_name)
|
||||||
|
|
||||||
|
######## Streaming Class ############################
|
||||||
|
# wraps the completion stream to return the correct format for the model
|
||||||
|
# replicate/anthropic/cohere
|
||||||
|
class CustomStreamWrapper:
|
||||||
|
def __init__(self, completion_stream, model):
|
||||||
|
self.model = model
|
||||||
|
if model in litellm.cohere_models:
|
||||||
|
# cohere does not return an iterator, so we need to wrap it in one
|
||||||
|
self.completion_stream = iter(completion_stream)
|
||||||
|
else:
|
||||||
|
self.completion_stream = completion_stream
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if self.model in litellm.anthropic_models:
|
||||||
|
chunk = next(self.completion_stream)
|
||||||
|
return {"choices": [{"delta": chunk.completion}]}
|
||||||
|
elif self.model == "replicate":
|
||||||
|
chunk = next(self.completion_stream)
|
||||||
|
return {"choices": [{"delta": chunk}]}
|
||||||
|
elif self.model in litellm.cohere_models:
|
||||||
|
chunk = next(self.completion_stream)
|
||||||
|
return {"choices": [{"delta": chunk.text}]}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@ nav:
|
||||||
- Input - Request Body: input.md
|
- Input - Request Body: input.md
|
||||||
- Output - Response Object: output.md
|
- Output - Response Object: output.md
|
||||||
- Streaming & Async Calls: stream.md
|
- Streaming & Async Calls: stream.md
|
||||||
|
- token usage:
|
||||||
|
- Helper Functions: token_usage.md
|
||||||
- 🤖 Supported LLM APIs:
|
- 🤖 Supported LLM APIs:
|
||||||
- Supported Completion & Chat APIs: supported.md
|
- Supported Completion & Chat APIs: supported.md
|
||||||
- Supported Embedding APIs: supported_embedding.md
|
- Supported Embedding APIs: supported_embedding.md
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.356"
|
version = "0.1.367"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
@ -8,14 +8,8 @@ readme = "README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.8"
|
python = "^3.8"
|
||||||
openai = {extras = ["datalib"], version = "^0.27.8"}
|
openai = "^0.27.8"
|
||||||
cohere = "^4.18.0"
|
|
||||||
pytest = "^7.4.0"
|
|
||||||
pydantic = "^2.1.1"
|
|
||||||
anthropic = "^0.3.7"
|
|
||||||
replicate = "^0.10.0"
|
|
||||||
python-dotenv = "^1.0.0"
|
python-dotenv = "^1.0.0"
|
||||||
tenacity = "^8.0.1"
|
|
||||||
tiktoken = "^0.4.0"
|
tiktoken = "^0.4.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
@ -1,11 +1,5 @@
|
||||||
pydantic
|
# used by CI/CD testing
|
||||||
openai
|
openai
|
||||||
cohere
|
|
||||||
anthropic
|
|
||||||
replicate
|
|
||||||
pytest
|
|
||||||
python-dotenv
|
python-dotenv
|
||||||
openai[datalib]
|
openai
|
||||||
tenacity
|
|
||||||
tiktoken
|
tiktoken
|
||||||
infisical
|
|
Loading…
Add table
Add a link
Reference in a new issue