Compare commits

..

25 commits

Author SHA1 Message Date
sweep-ai[bot]
521621c3b1
Merge main into sweep/add-sweep-config 2023-08-01 18:32:52 +00:00
sweep-ai[bot]
ae3a6c1391
Merge main into sweep/add-sweep-config 2023-08-01 18:27:35 +00:00
sweep-ai[bot]
14327e2bf5
Merge main into sweep/add-sweep-config 2023-08-01 17:54:22 +00:00
sweep-ai[bot]
61ec91d69e
Merge main into sweep/add-sweep-config 2023-08-01 17:40:01 +00:00
sweep-ai[bot]
44adf0b53a
Update requirements.txt 2023-08-01 15:35:15 +00:00
sweep-ai[bot]
8f7ffbdc9e
Update requirements.txt 2023-08-01 15:34:14 +00:00
sweep-ai[bot]
42a7d8efdf
Update requirements.txt 2023-08-01 15:29:03 +00:00
sweep-ai[bot]
0714fd1bf4
Update requirements.txt 2023-08-01 15:28:17 +00:00
sweep-ai[bot]
ee48b14cf8
Merge main into sweep/add-sweep-config 2023-08-01 15:26:46 +00:00
sweep-ai[bot]
c4b4a2bd26
Update requirements.txt 2023-08-01 15:22:42 +00:00
sweep-ai[bot]
9b1066a03f
Merge main into sweep/add-sweep-config 2023-08-01 15:19:38 +00:00
sweep-ai[bot]
338800e846
Update requirements.txt 2023-08-01 15:19:14 +00:00
sweep-ai[bot]
04383dbc73
Merge main into sweep/add-sweep-config 2023-08-01 15:18:53 +00:00
sweep-ai[bot]
d7a611dfba
Update requirements.txt 2023-08-01 15:16:46 +00:00
sweep-ai[bot]
72b61da654
Update requirements.txt 2023-08-01 15:11:37 +00:00
sweep-ai[bot]
7b5c2e2c4b
Update requirements.txt 2023-08-01 15:08:34 +00:00
sweep-ai[bot]
2d21281eff
Update requirements.txt 2023-08-01 15:08:09 +00:00
sweep-ai[bot]
fac40ecdd5
Update requirements.txt 2023-08-01 15:06:08 +00:00
sweep-ai[bot]
2b80d79aef
Update requirements.txt 2023-08-01 15:05:33 +00:00
sweep-ai[bot]
fe59959678
Update build/lib/litellm/main.py 2023-08-01 14:57:30 +00:00
sweep-ai[bot]
b6fe7f7b0a
Update requirements.txt 2023-08-01 14:56:59 +00:00
sweep-ai[bot]
c8d32560aa
Create refactor template 2023-08-01 14:48:57 +00:00
sweep-ai[bot]
323b238d5d
Create feature template 2023-08-01 14:48:56 +00:00
sweep-ai[bot]
e354e516e2
Create bugfix template 2023-08-01 14:48:56 +00:00
sweep-ai[bot]
bdec7e82bc
Create sweep.yaml config file 2023-08-01 14:48:56 +00:00
1771 changed files with 1874 additions and 807804 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,11 +0,0 @@
# used by CI/CD testing
openai==1.54.0
python-dotenv
tiktoken
importlib_metadata
cohere
redis
anthropic
orjson==3.9.15
pydantic==2.7.1
google-cloud-aiplatform==1.43.0

View file

@ -1,52 +0,0 @@
{
"name": "Python 3.11",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:3.11-bookworm",
// https://github.com/devcontainers/images/tree/main/src/python
// https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
// "build": {
// "dockerfile": "Dockerfile",
// "context": ".."
// },
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
"GitHub.copilot",
"GitHub.copilot-chat",
"ms-python.autopep8"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
"forwardPorts": [4000],
"containerEnv": {
"LITELLM_LOG": "DEBUG"
},
// Use 'portsAttributes' to set default properties for specific forwarded ports.
// More info: https://containers.dev/implementors/json_reference/#port-attributes
"portsAttributes": {
"4000": {
"label": "LiteLLM Server",
"onAutoForward": "notify"
}
},
// More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "litellm",
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pipx install poetry && poetry install -E extra_proxy -E proxy"
}

View file

@ -1,11 +0,0 @@
docs
cookbook
.circleci
.github
tests
.git
.github
.circleci
.devcontainer
*.tgz
log.txt

View file

@ -1,22 +1,4 @@
# OpenAI
OPENAI_API_KEY = ""
OPENAI_API_BASE = ""
# Cohere
COHERE_API_KEY = ""
# OpenRouter
OR_SITE_URL = ""
OR_APP_NAME = "LiteLLM Example app"
OR_API_KEY = ""
# Azure API base URL
AZURE_API_BASE = ""
# Azure API version
AZURE_API_VERSION = ""
# Azure API key
AZURE_API_KEY = ""
# Replicate
REPLICATE_API_KEY = ""
REPLICATE_API_TOKEN = ""
# Anthropic
ANTHROPIC_API_KEY = ""
# Infisical
INFISICAL_TOKEN = ""
OR_APP_NAME = "LiteLLM Example app"

46
.flake8
View file

@ -1,46 +0,0 @@
[flake8]
ignore =
# The following ignores can be removed when formatting using black
W191,W291,W292,W293,W391,W504
E101,E111,E114,E116,E117,E121,E122,E123,E124,E125,E126,E127,E128,E129,E131,
E201,E202,E221,E222,E225,E226,E231,E241,E251,E252,E261,E265,E271,E272,E275,
E301,E302,E303,E305,E306,
# line break before binary operator
W503,
# inline comment should start with '# '
E262,
# too many leading '#' for block comment
E266,
# multiple imports on one line
E401,
# module level import not at top of file
E402,
# Line too long (82 > 79 characters)
E501,
# comparison to None should be 'if cond is None:'
E711,
# comparison to True should be 'if cond is True:' or 'if cond:'
E712,
# do not compare types, for exact checks use `is` / `is not`, for instance checks use `isinstance()`
E721,
# do not use bare 'except'
E722,
# x is imported but unused
F401,
# 'from . import *' used; unable to detect undefined names
F403,
# x may be undefined, or defined from star imports:
F405,
# f-string is missing placeholders
F541,
# dictionary key '' repeated with different values
F601,
# redefinition of unused x from line 123
F811,
# undefined name x
F821,
# local variable x is assigned to but never used
F841,
# https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
extend-ignore = E203

View file

@ -1,10 +0,0 @@
# Add the commit hash of any commit you want to ignore in `git blame` here.
# One commit hash per line.
#
# The GitHub Blame UI will use this file automatically!
#
# Run this command to always ignore formatting commits in `git blame`
# git config blame.ignoreRevsFile .git-blame-ignore-revs
# Update pydantic code to fix warnings (GH-3600)
876840e9957bc7e9f7d6a2b58c4d7c53dad16481

1
.gitattributes vendored
View file

@ -1 +0,0 @@
*.ipynb linguist-vendored

13
.github/FUNDING.yml vendored
View file

@ -1,13 +0,0 @@
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: https://buy.stripe.com/9AQ03Kd3P91o0Q8bIS

View file

@ -1,32 +0,0 @@
name: Bug Report
description: File a bug report
title: "[Bug]: "
labels: ["bug"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
value: "A bug happened!"
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
- type: input
id: contact
attributes:
label: Twitter / LinkedIn details
description: We announce new features on Twitter + LinkedIn. If this issue leads to an announcement, and you'd like a mention, we'll gladly shout you out!
placeholder: ex. @krrish_dh / https://www.linkedin.com/in/krish-d/
validations:
required: false

View file

@ -1,8 +0,0 @@
blank_issues_enabled: true
contact_links:
- name: Schedule Demo
url: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat
about: Speak directly with Krrish and Ishaan, the founders, to discuss issues, share feedback, or explore improvements for LiteLLM
- name: Discord
url: https://discord.com/invite/wuPM9dRgDw
about: Join 250+ LiteLLM community members!

View file

@ -1,32 +0,0 @@
name: 🚀 Feature Request
description: Submit a proposal/request for a new LiteLLM feature.
title: "[Feature]: "
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
Thanks for making LiteLLM better!
- type: textarea
id: the-feature
attributes:
label: The Feature
description: A clear and concise description of the feature proposal
placeholder: Tell us what you want!
validations:
required: true
- type: textarea
id: motivation
attributes:
label: Motivation, pitch
description: Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., "I'm working on X and would like Y to be possible". If this is related to another GitHub issue, please link here too.
validations:
required: true
- type: input
id: contact
attributes:
label: Twitter / LinkedIn details
description: We announce new features on Twitter + LinkedIn. When this is announced, and you'd like a mention, we'll gladly shout you out!
placeholder: ex. @krrish_dh / https://www.linkedin.com/in/krish-d/
validations:
required: false

11
.github/ISSUE_TEMPLATE/sweep-bugfix.yml vendored Normal file
View file

@ -0,0 +1,11 @@
name: Bugfix
title: 'Sweep: '
description: Write something like "We notice ... behavior when ... happens instead of ...""
labels: sweep
body:
- type: textarea
id: description
attributes:
label: Details
description: More details about the bug
placeholder: The bug might be in ... file

View file

@ -0,0 +1,11 @@
name: Feature Request
title: 'Sweep: '
description: Write something like "Write an api endpoint that does "..." in the "..." file"
labels: sweep
body:
- type: textarea
id: description
attributes:
label: Details
description: More details for Sweep
placeholder: The new endpoint should use the ... class from ... file because it contains ... logic

View file

@ -0,0 +1,11 @@
name: Refactor
title: 'Sweep: '
description: Write something like "Modify the ... api endpoint to use ... version and ... framework"
labels: sweep
body:
- type: textarea
id: description
attributes:
label: Details
description: More details for Sweep
placeholder: We are migrating this function to ... version because ...

View file

@ -1,77 +0,0 @@
name: Helm OCI Chart Releaser
description: Push Helm charts to OCI-based (Docker) registries
author: sergeyshaykhullin
branding:
color: yellow
icon: upload-cloud
inputs:
name:
required: true
description: Chart name
repository:
required: true
description: Chart repository name
tag:
required: true
description: Chart version
app_version:
required: true
description: App version
path:
required: false
description: Chart path (Default 'charts/{name}')
registry:
required: true
description: OCI registry
registry_username:
required: true
description: OCI registry username
registry_password:
required: true
description: OCI registry password
update_dependencies:
required: false
default: 'false'
description: Update chart dependencies before packaging (Default 'false')
outputs:
image:
value: ${{ steps.output.outputs.image }}
description: Chart image (Default '{registry}/{repository}/{image}:{tag}')
runs:
using: composite
steps:
- name: Helm | Login
shell: bash
run: echo ${{ inputs.registry_password }} | helm registry login -u ${{ inputs.registry_username }} --password-stdin ${{ inputs.registry }}
env:
HELM_EXPERIMENTAL_OCI: '1'
- name: Helm | Dependency
if: inputs.update_dependencies == 'true'
shell: bash
run: helm dependency update ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
env:
HELM_EXPERIMENTAL_OCI: '1'
- name: Helm | Package
shell: bash
run: helm package ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }} --version ${{ inputs.tag }} --app-version ${{ inputs.app_version }}
env:
HELM_EXPERIMENTAL_OCI: '1'
- name: Helm | Push
shell: bash
run: helm push ${{ inputs.name }}-${{ inputs.tag }}.tgz oci://${{ inputs.registry }}/${{ inputs.repository }}
env:
HELM_EXPERIMENTAL_OCI: '1'
- name: Helm | Logout
shell: bash
run: helm registry logout ${{ inputs.registry }}
env:
HELM_EXPERIMENTAL_OCI: '1'
- name: Helm | Output
id: output
shell: bash
run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT

View file

@ -1,10 +0,0 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
groups:
github-actions:
patterns:
- "*"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

View file

@ -1,29 +0,0 @@
## Title
<!-- e.g. "Implement user authentication feature" -->
## Relevant issues
<!-- e.g. "Fixes #000" -->
## Type
<!-- Select the type of Pull Request -->
<!-- Keep only the necessary ones -->
🆕 New Feature
🐛 Bug Fix
🧹 Refactoring
📖 Documentation
🚄 Infrastructure
✅ Test
## Changes
<!-- List of changes -->
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locall
If UI changes, send a screenshot/GIF of working UI fixes
<!-- Test procedure -->

94
.github/template.yaml vendored
View file

@ -1,94 +0,0 @@
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: >
llmlite-service
SAM Template for llmlite-service
# More info about Globals: https://github.com/awslabs/serverless-application-model/blob/master/docs/globals.rst
Globals:
Function:
Timeout: 600
MemorySize: 128
Environment:
Variables:
WORKER_CONFIG: !Ref WorkerConfigParameter
Parameters:
AliasParameter:
Type: String
Default: live
WorkerConfigParameter:
Type: String
Description: Sample environment variable
Default: '{"model": null, "alias": null, "api_base": null, "api_version": "2023-07-01-preview", "debug": false, "temperature": null, "max_tokens": null, "request_timeout": 600, "max_budget": null, "telemetry": true, "drop_params": false, "add_function_to_prompt": false, "headers": null, "save": false, "config": null, "use_queue": false}'
Resources:
MyUrlFunctionPermissions:
Type: AWS::Lambda::Permission
Properties:
FunctionName: !Ref URL
Action: lambda:InvokeFunctionUrl
Principal: "*"
FunctionUrlAuthType: NONE
Function:
Type: AWS::Serverless::Function
Properties:
FunctionName: !Sub "${AWS::StackName}-function"
CodeUri: "./litellm"
Handler: proxy/lambda.handler
Runtime: python3.11
AutoPublishAlias: !Ref AliasParameter
Architectures:
- x86_64
DeploymentPreference:
Type: AllAtOnce
Alarms:
- !Ref NewVersionErrorMetricGreaterThanZeroAlarm
NewVersionErrorMetricGreaterThanZeroAlarm:
Type: "AWS::CloudWatch::Alarm"
Properties:
AlarmDescription: Lambda Function Error > 0
ComparisonOperator: GreaterThanThreshold
Dimensions:
- Name: Resource
Value: !Sub "${Function}:live"
- Name: FunctionName
Value: !Ref Function
- Name: ExecutedVersion
Value: !GetAtt Function.Version.Version
EvaluationPeriods: 1
Unit: Count
MetricName: Errors
Namespace: AWS/Lambda
Period: 60
Statistic: Sum
Threshold: 0
URL:
Type: AWS::Lambda::Url
DependsOn: FunctionAliaslive
Properties:
AuthType: NONE
Qualifier: live
TargetFunctionArn: !GetAtt Function.Arn
Outputs:
FunctionARN:
Description: "Lambda Function ARN"
Value: !GetAtt Function.Arn
FunctionUrl:
Description: "Lambda Function URL Endpoint"
Value:
Fn::GetAtt: URL.FunctionUrl
FunctionVersion:
Description: "Lambda Function Version"
Value: !GetAtt Function.Version.Version
FunctionNewAlarmARN:
Description: "Lambda Function New Alarm ARN"
Value: !GetAtt NewVersionErrorMetricGreaterThanZeroAlarm.Arn

View file

@ -1,28 +0,0 @@
name: Updates model_prices_and_context_window.json and Create Pull Request
on:
schedule:
- cron: "0 0 * * 0" # Run every Sundays at midnight
#- cron: "0 0 * * *" # Run daily at midnight
jobs:
auto_update_price_and_context_window:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Dependencies
run: |
pip install aiohttp
- name: Update JSON Data
run: |
python ".github/workflows/auto_update_price_and_context_window_file.py"
- name: Create Pull Request
run: |
git add model_prices_and_context_window.json
git commit -m "Update model_prices_and_context_window.json file: $(date +'%Y-%m-%d')"
gh pr create --title "Update model_prices_and_context_window.json file" \
--body "Automated update for model_prices_and_context_window.json" \
--head auto-update-price-and-context-window-$(date +'%Y-%m-%d') \
--base main
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}

View file

@ -1,121 +0,0 @@
import asyncio
import aiohttp
import json
# Asynchronously fetch data from a given URL
async def fetch_data(url):
try:
# Create an asynchronous session
async with aiohttp.ClientSession() as session:
# Send a GET request to the URL
async with session.get(url) as resp:
# Raise an error if the response status is not OK
resp.raise_for_status()
# Parse the response JSON
resp_json = await resp.json()
print("Fetch the data from URL.")
# Return the 'data' field from the JSON response
return resp_json['data']
except Exception as e:
# Print an error message if fetching data fails
print("Error fetching data from URL:", e)
return None
# Synchronize local data with remote data
def sync_local_data_with_remote(local_data, remote_data):
# Update existing keys in local_data with values from remote_data
for key in (set(local_data) & set(remote_data)):
local_data[key].update(remote_data[key])
# Add new keys from remote_data to local_data
for key in (set(remote_data) - set(local_data)):
local_data[key] = remote_data[key]
# Write data to the json file
def write_to_file(file_path, data):
try:
# Open the file in write mode
with open(file_path, "w") as file:
# Dump the data as JSON into the file
json.dump(data, file, indent=4)
print("Values updated successfully.")
except Exception as e:
# Print an error message if writing to file fails
print("Error updating JSON file:", e)
# Update the existing models and add the missing models
def transform_remote_data(data):
transformed = {}
for row in data:
# Add the fields 'max_tokens' and 'input_cost_per_token'
obj = {
"max_tokens": row["context_length"],
"input_cost_per_token": float(row["pricing"]["prompt"]),
}
# Add 'max_output_tokens' as a field if it is not None
if "top_provider" in row and "max_completion_tokens" in row["top_provider"] and row["top_provider"]["max_completion_tokens"] is not None:
obj['max_output_tokens'] = int(row["top_provider"]["max_completion_tokens"])
# Add the field 'output_cost_per_token'
obj.update({
"output_cost_per_token": float(row["pricing"]["completion"]),
})
# Add field 'input_cost_per_image' if it exists and is non-zero
if "pricing" in row and "image" in row["pricing"] and float(row["pricing"]["image"]) != 0.0:
obj['input_cost_per_image'] = float(row["pricing"]["image"])
# Add the fields 'litellm_provider' and 'mode'
obj.update({
"litellm_provider": "openrouter",
"mode": "chat"
})
# Add the 'supports_vision' field if the modality is 'multimodal'
if row.get('architecture', {}).get('modality') == 'multimodal':
obj['supports_vision'] = True
# Use a composite key to store the transformed object
transformed[f'openrouter/{row["id"]}'] = obj
return transformed
# Load local data from a specified file
def load_local_data(file_path):
try:
# Open the file in read mode
with open(file_path, "r") as file:
# Load and return the JSON data
return json.load(file)
except FileNotFoundError:
# Print an error message if the file is not found
print("File not found:", file_path)
return None
except json.JSONDecodeError as e:
# Print an error message if JSON decoding fails
print("Error decoding JSON:", e)
return None
def main():
local_file_path = "model_prices_and_context_window.json" # Path to the local data file
url = "https://openrouter.ai/api/v1/models" # URL to fetch remote data
# Load local data from file
local_data = load_local_data(local_file_path)
# Fetch remote data asynchronously
remote_data = asyncio.run(fetch_data(url))
# Transform the fetched remote data
remote_data = transform_remote_data(remote_data)
# If both local and remote data are available, synchronize and save
if local_data and remote_data:
sync_local_data_with_remote(local_data, remote_data)
write_to_file(local_file_path, local_data)
else:
print("Failed to fetch model data from either local file or URL.")
# Entry point of the script
if __name__ == "__main__":
main()

View file

@ -1,374 +0,0 @@
# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
name: Build, Publish LiteLLM Docker Image. New Release
on:
workflow_dispatch:
inputs:
tag:
description: "The tag version you want to build"
release_type:
description: "The release type you want to build. Can be 'latest', 'stable', 'dev'"
type: string
default: "latest"
commit_hash:
description: "Commit hash"
required: true
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
CHART_NAME: litellm-helm
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
# print commit hash, tag, and release type
print:
runs-on: ubuntu-latest
steps:
- run: |
echo "Commit hash: ${{ github.event.inputs.commit_hash }}"
echo "Tag: ${{ github.event.inputs.tag }}"
echo "Release type: ${{ github.event.inputs.release_type }}"
docker-hub-deploy:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
-
name: Set up QEMU
uses: docker/setup-qemu-action@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-database image
uses: docker/build-push-action@v5
with:
context: .
push: true
file: ./docker/Dockerfile.database
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-spend-logs image
uses: docker/build-push-action@v5
with:
context: .
push: true
file: ./litellm-js/spend-logs/Dockerfile
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
build-and-push-image:
runs-on: ubuntu-latest
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
packages: write
#
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
# It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
- name: Build and push Docker image
uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
labels: ${{ steps.meta.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-database:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for database Dockerfile
id: meta-database
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-database
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./docker/Dockerfile.database
push: true
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
labels: ${{ steps.meta-database.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-non_root:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for non_root Dockerfile
id: meta-non_root
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-non_root
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push non_root Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./docker/Dockerfile.non_root
push: true
tags: ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
labels: ${{ steps.meta-non_root.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-spend-logs:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for spend-logs Dockerfile
id: meta-spend-logs
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./litellm-js/spend-logs/Dockerfile
push: true
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-helm-chart:
if: github.event.inputs.release_type != 'dev'
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: lowercase github.repository_owner
run: |
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
- name: Get LiteLLM Latest Tag
id: current_app_tag
shell: bash
run: |
LATEST_TAG=$(git describe --tags --exclude "*dev*" --abbrev=0)
if [ -z "${LATEST_TAG}" ]; then
echo "latest_tag=latest" | tee -a $GITHUB_OUTPUT
else
echo "latest_tag=${LATEST_TAG}" | tee -a $GITHUB_OUTPUT
fi
- name: Get last published chart version
id: current_version
shell: bash
run: |
CHART_LIST=$(helm show chart oci://${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.CHART_NAME }} 2>/dev/null || true)
if [ -z "${CHART_LIST}" ]; then
echo "current-version=0.1.0" | tee -a $GITHUB_OUTPUT
else
printf '%s' "${CHART_LIST}" | grep '^version:' | awk 'BEGIN{FS=":"}{print "current-version="$2}' | tr -d " " | tee -a $GITHUB_OUTPUT
fi
env:
HELM_EXPERIMENTAL_OCI: '1'
# Automatically update the helm chart version one "patch" level
- name: Bump release version
id: bump_version
uses: christian-draeger/increment-semantic-version@1.1.0
with:
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
version-fragment: 'bug'
- uses: ./.github/actions/helm-oci-chart-releaser
with:
name: ${{ env.CHART_NAME }}
repository: ${{ env.REPO_OWNER }}
tag: ${{ github.event.inputs.chartVersion || steps.bump_version.outputs.next-version || '0.1.0' }}
app_version: ${{ steps.current_app_tag.outputs.latest_tag }}
path: deploy/charts/${{ env.CHART_NAME }}
registry: ${{ env.REGISTRY }}
registry_username: ${{ github.actor }}
registry_password: ${{ secrets.GITHUB_TOKEN }}
update_dependencies: true
release:
name: "New LiteLLM Release"
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
runs-on: "ubuntu-latest"
steps:
- name: Display version
run: echo "Current version is ${{ github.event.inputs.tag }}"
- name: "Set Release Tag"
run: echo "RELEASE_TAG=${{ github.event.inputs.tag }}" >> $GITHUB_ENV
- name: Display release tag
run: echo "RELEASE_TAG is $RELEASE_TAG"
- name: "Create release"
uses: "actions/github-script@v6"
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
const commitHash = "${{ github.event.inputs.commit_hash}}";
console.log("Commit Hash:", commitHash); // Add this line for debugging
try {
const response = await github.rest.repos.createRelease({
draft: false,
generate_release_notes: true,
target_commitish: commitHash,
name: process.env.RELEASE_TAG,
owner: context.repo.owner,
prerelease: false,
repo: context.repo.repo,
tag_name: process.env.RELEASE_TAG,
});
core.exportVariable('RELEASE_ID', response.data.id);
core.exportVariable('RELEASE_UPLOAD_URL', response.data.upload_url);
} catch (error) {
core.setFailed(error.message);
}
- name: Fetch Release Notes
id: release-notes
uses: actions/github-script@v6
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
try {
const response = await github.rest.repos.getRelease({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: process.env.RELEASE_ID,
});
const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
return formattedBody;
} catch (error) {
core.setFailed(error.message);
}
env:
RELEASE_ID: ${{ env.RELEASE_ID }}
- name: Github Releases To Discord
env:
WEBHOOK_URL: ${{ secrets.WEBHOOK_URL }}
REALEASE_TAG: ${{ env.RELEASE_TAG }}
RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
run: |
curl -H "Content-Type: application/json" -X POST -d '{
"content": "New LiteLLM release '"${RELEASE_TAG}"'",
"username": "Release Changelog",
"avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
"embeds": [
{
"title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
"description": "'"${RELEASE_NOTES}"'",
"color": 2105893
}
]
}' $WEBHOOK_URL

View file

@ -1,67 +0,0 @@
# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
name: Build, Publish LiteLLM Helm Chart. New Release
on:
workflow_dispatch:
inputs:
chartVersion:
description: "Update the helm chart's version to this"
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
REPO_OWNER: ${{github.repository_owner}}
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
build-and-push-helm-chart:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: lowercase github.repository_owner
run: |
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
- name: Get LiteLLM Latest Tag
id: current_app_tag
uses: WyriHaximus/github-action-get-previous-tag@v1.3.0
- name: Get last published chart version
id: current_version
shell: bash
run: helm show chart oci://${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/litellm-helm | grep '^version:' | awk 'BEGIN{FS=":"}{print "current-version="$2}' | tr -d " " | tee -a $GITHUB_OUTPUT
env:
HELM_EXPERIMENTAL_OCI: '1'
# Automatically update the helm chart version one "patch" level
- name: Bump release version
id: bump_version
uses: christian-draeger/increment-semantic-version@1.1.0
with:
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
version-fragment: 'bug'
- name: Lint helm chart
run: helm lint deploy/charts/litellm-helm
- uses: ./.github/actions/helm-oci-chart-releaser
with:
name: litellm-helm
repository: ${{ env.REPO_OWNER }}
tag: ${{ github.event.inputs.chartVersion || steps.bump_version.outputs.next-version || '0.1.0' }}
app_version: ${{ steps.current_app_tag.outputs.tag || 'latest' }}
path: deploy/charts/litellm-helm
registry: ${{ env.REGISTRY }}
registry_username: ${{ github.actor }}
registry_password: ${{ secrets.GITHUB_TOKEN }}
update_dependencies: true

View file

@ -1,113 +0,0 @@
import csv
import os
from github import Github
def interpret_results(csv_file):
with open(csv_file, newline="") as csvfile:
csvreader = csv.DictReader(csvfile)
rows = list(csvreader)
"""
in this csv reader
- Create 1 new column "Status"
- if a row has a median response time < 300 and an average response time < 300, Status = "Passed ✅"
- if a row has a median response time >= 300 or an average response time >= 300, Status = "Failed ❌"
- Order the table in this order Name, Status, Median Response Time, Average Response Time, Requests/s,Failures/s, Min Response Time, Max Response Time, all other columns
"""
# Add a new column "Status"
for row in rows:
median_response_time = float(
row["Median Response Time"].strip().rstrip("ms")
)
average_response_time = float(
row["Average Response Time"].strip().rstrip("s")
)
request_count = int(row["Request Count"])
failure_count = int(row["Failure Count"])
failure_percent = round((failure_count / request_count) * 100, 2)
# Determine status based on conditions
if (
median_response_time < 300
and average_response_time < 300
and failure_percent < 5
):
row["Status"] = "Passed ✅"
else:
row["Status"] = "Failed ❌"
# Construct Markdown table header
markdown_table = "| Name | Status | Median Response Time (ms) | Average Response Time (ms) | Requests/s | Failures/s | Request Count | Failure Count | Min Response Time (ms) | Max Response Time (ms) |"
markdown_table += (
"\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
)
# Construct Markdown table rows
for row in rows:
markdown_table += f"\n| {row['Name']} | {row['Status']} | {row['Median Response Time']} | {row['Average Response Time']} | {row['Requests/s']} | {row['Failures/s']} | {row['Request Count']} | {row['Failure Count']} | {row['Min Response Time']} | {row['Max Response Time']} |"
print("markdown table: ", markdown_table)
return markdown_table
if __name__ == "__main__":
csv_file = "load_test_stats.csv" # Change this to the path of your CSV file
markdown_table = interpret_results(csv_file)
# Update release body with interpreted results
github_token = os.getenv("GITHUB_TOKEN")
g = Github(github_token)
repo = g.get_repo(
"BerriAI/litellm"
) # Replace with your repository's username and name
latest_release = repo.get_latest_release()
print("got latest release: ", latest_release)
print(latest_release.title)
print(latest_release.tag_name)
release_version = latest_release.title
print("latest release body: ", latest_release.body)
print("markdown table: ", markdown_table)
# check if "Load Test LiteLLM Proxy Results" exists
existing_release_body = latest_release.body
if "Load Test LiteLLM Proxy Results" in latest_release.body:
# find the "Load Test LiteLLM Proxy Results" section and delete it
start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
existing_release_body = latest_release.body[:start_index]
docker_run_command = f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
```
"""
print("docker run command: ", docker_run_command)
new_release_body = (
existing_release_body
+ docker_run_command
+ "\n\n"
+ "### Don't want to maintain your internal proxy? get in touch 🎉"
+ "\nHosted Proxy Alpha: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
+ "\n\n"
+ "## Load Test LiteLLM Proxy Results"
+ "\n\n"
+ markdown_table
)
print("new release body: ", new_release_body)
try:
latest_release.update_release(
name=latest_release.tag_name,
message=new_release_body,
)
except Exception as e:
print(e)

View file

@ -1,59 +0,0 @@
name: Test Locust Load Test
on:
workflow_run:
workflows: ["Build, Publish LiteLLM Docker Image. New Release"]
types:
- completed
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install PyGithub
- name: re-deploy proxy
run: |
echo "Current working directory: $PWD"
ls
python ".github/workflows/redeploy_proxy.py"
env:
LOAD_TEST_REDEPLOY_URL1: ${{ secrets.LOAD_TEST_REDEPLOY_URL1 }}
LOAD_TEST_REDEPLOY_URL2: ${{ secrets.LOAD_TEST_REDEPLOY_URL2 }}
working-directory: ${{ github.workspace }}
- name: Run Load Test
id: locust_run
uses: BerriAI/locust-github-action@master
with:
LOCUSTFILE: ".github/workflows/locustfile.py"
URL: "https://post-release-load-test-proxy.onrender.com/"
USERS: "20"
RATE: "20"
RUNTIME: "300s"
- name: Process Load Test Stats
run: |
echo "Current working directory: $PWD"
ls
python ".github/workflows/interpret_load_test.py"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
working-directory: ${{ github.workspace }}
- name: Upload CSV as Asset to Latest Release
uses: xresloader/upload-to-github-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
file: "load_test_stats.csv;load_test.html"
update_latest_release: true
tag_name: "load-test"
overwrite: true

View file

@ -1,30 +0,0 @@
from locust import HttpUser, task, between, events
import json
import time
class MyUser(HttpUser):
wait_time = between(1, 5)
@task
def chat_completion(self):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
# Include any additional headers you may need for authentication, etc.
}
# Customize the payload with "model" and "messages" keys
payload = {
"model": "fake-openai-endpoint",
"messages": [
{"role": "system", "content": "You are a chat bot."},
{"role": "user", "content": "Hello, how are you?"},
],
# Add more data as necessary
}
# Make a POST request to the "chat/completions" endpoint
response = self.client.post("chat/completions", json=payload, headers=headers)
# Print or log the response if needed

View file

@ -1,34 +0,0 @@
name: Publish Dev Release to PyPI
on:
workflow_dispatch:
jobs:
publish-dev-release:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8 # Adjust the Python version as needed
- name: Install dependencies
run: pip install toml twine
- name: Read version from pyproject.toml
id: read-version
run: |
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
- name: Check if version exists on PyPI
id: check-version
run: |
set -e
if twine check --repository-url https://pypi.org/simple/ "litellm==$LITELLM_VERSION" >/dev/null 2>&1; then
echo "Version $LITELLM_VERSION already exists on PyPI. Skipping publish."

35
.github/workflows/publish_pypi.yml vendored Normal file
View file

@ -0,0 +1,35 @@
name: Publish to PyPI
on:
push:
branches:
- main # You can change this to the branch you want to publish from
paths:
- 'setup.py'
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8 # You can change this to the Python version required for your package
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install twine
pip install wheel
pip install --upgrade setuptools
- name: Build package
run: python setup.py sdist bdist_wheel
- name: Upload to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: twine upload dist/*

View file

@ -1,31 +0,0 @@
name: Read Version from pyproject.toml
on:
push:
branches:
- main # Change this to the default branch of your repository
jobs:
read-version:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8 # Adjust the Python version as needed
- name: Install dependencies
run: pip install toml
- name: Read version from pyproject.toml
id: read-version
run: |
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
- name: Display version
run: echo "Current version is $LITELLM_VERSION"

View file

@ -1,20 +0,0 @@
"""
redeploy_proxy.py
"""
import os
import requests
import time
# send a get request to this endpoint
deploy_hook1 = os.getenv("LOAD_TEST_REDEPLOY_URL1")
response = requests.get(deploy_hook1, timeout=20)
deploy_hook2 = os.getenv("LOAD_TEST_REDEPLOY_URL2")
response = requests.get(deploy_hook2, timeout=20)
print("SENT GET REQUESTS to re-deploy proxy")
print("sleeeping.... for 60s")
time.sleep(60)

View file

@ -1,27 +0,0 @@
Date,"Ben
Ashley",Tom Brooks,Jimmy Cooney,"Sue
Daniels",Berlinda Fong,Terry Jones,Angelina Little,Linda Smith
10/1,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE
10/2,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/3,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/4,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/5,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/6,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/7,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/8,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/9,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/10,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/11,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/12,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/13,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/14,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/15,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/16,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/17,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/18,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/19,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/20,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/21,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/22,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
10/23,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
Total,0,1,1,1,1,1,0,1
1 Date Ben Ashley Tom Brooks Jimmy Cooney Sue Daniels Berlinda Fong Terry Jones Angelina Little Linda Smith
2 10/1 FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
3 10/2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
4 10/3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
5 10/4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
6 10/5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
7 10/6 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
8 10/7 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
9 10/8 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
10 10/9 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
11 10/10 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
12 10/11 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
13 10/12 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
14 10/13 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
15 10/14 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
16 10/15 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
17 10/16 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
18 10/17 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
19 10/18 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
20 10/19 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
21 10/20 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
22 10/21 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
23 10/22 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
24 10/23 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
25 Total 0 1 1 1 1 1 0 1

44
.github/workflows/tests.yml vendored Normal file
View file

@ -0,0 +1,44 @@
name: liteLLM Dev Tests
on: [push, pull_request]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_API_URL: ${{ secrets.POSTHOG_API_URL }}
SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
SLACK_API_SECRET: ${{ secrets.SLACK_API_SECRET }}
SLACK_API_CHANNEL: ${{ secrets.SLACK_API_CHANNEL }}
SENTRY_API_URL: ${{ secrets.SENTRY_API_URL }}
SENTRY_API_TRACE_RATE: ${{ secrets.SENTRY_API_TRACE_RATE }}
jobs:
test:
name: Run Tests
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8 # Replace 'x' with the desired version (e.g., 3.6, 3.7, 3.8)
- name: Install dependencies
run: pip install -r requirements.txt
- name: Run tests
run: pytest litellm/tests
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}

View file

@ -1,54 +0,0 @@
import os
import requests
from datetime import datetime
# GitHub API endpoints
GITHUB_API_URL = "https://api.github.com"
REPO_OWNER = "BerriAI"
REPO_NAME = "litellm"
# GitHub personal access token (required for uploading release assets)
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN")
# Headers for GitHub API requests
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {GITHUB_ACCESS_TOKEN}",
"X-GitHub-Api-Version": "2022-11-28",
}
# Get the latest release
releases_url = f"{GITHUB_API_URL}/repos/{REPO_OWNER}/{REPO_NAME}/releases/latest"
response = requests.get(releases_url, headers=headers)
latest_release = response.json()
print("Latest release:", latest_release)
# Upload an asset to the latest release
upload_url = latest_release["upload_url"].split("{?")[0]
asset_name = "results_stats.csv"
asset_path = os.path.join(os.getcwd(), asset_name)
print("upload_url:", upload_url)
with open(asset_path, "rb") as asset_file:
asset_data = asset_file.read()
upload_payload = {
"name": asset_name,
"label": "Load test results",
"created_at": datetime.utcnow().isoformat() + "Z",
}
upload_headers = headers.copy()
upload_headers["Content-Type"] = "application/octet-stream"
upload_response = requests.post(
upload_url,
headers=upload_headers,
data=asset_data,
params=upload_payload,
)
if upload_response.status_code == 201:
print(f"Asset '{asset_name}' uploaded successfully to the latest release.")
else:
print(f"Failed to upload asset. Response: {upload_response.text}")

69
.gitignore vendored
View file

@ -1,68 +1 @@
.venv
.env
.newenv
newenv/*
litellm/proxy/myenv/*
litellm_uuid.txt
__pycache__/
*.pyc
bun.lockb
**/.DS_Store
.aider*
litellm_results.jsonl
secrets.toml
.gitignore
litellm/proxy/litellm_secrets.toml
litellm/proxy/api_log.json
.idea/
router_config.yaml
litellm_server/config.yaml
litellm/proxy/_secret_config.yaml
.aws-sam/
litellm/tests/aiologs.log
litellm/tests/exception_data.txt
litellm/tests/config_*.yaml
litellm/tests/langfuse.log
langfuse.log
.langfuse.log
litellm/tests/test_custom_logger.py
litellm/tests/langfuse.log
litellm/tests/dynamo*.log
.vscode/settings.json
litellm/proxy/log.txt
proxy_server_config_@.yaml
.gitignore
proxy_server_config_2.yaml
litellm/proxy/secret_managers/credentials.json
hosted_config.yaml
litellm/proxy/tests/node_modules
litellm/proxy/tests/package.json
litellm/proxy/tests/package-lock.json
ui/litellm-dashboard/.next
ui/litellm-dashboard/node_modules
ui/litellm-dashboard/next-env.d.ts
ui/litellm-dashboard/package.json
ui/litellm-dashboard/package-lock.json
deploy/charts/litellm/*.tgz
deploy/charts/litellm/charts/*
deploy/charts/*.tgz
litellm/proxy/vertex_key.json
**/.vim/
/node_modules
kub.yaml
loadtest_kub.yaml
litellm/proxy/_new_secret_config.yaml
litellm/proxy/_new_secret_config.yaml
litellm/proxy/_super_secret_config.yaml
litellm/proxy/_super_secret_config.yaml
litellm/proxy/myenv/bin/activate
litellm/proxy/myenv/bin/Activate.ps1
myenv/*
litellm/proxy/_experimental/out/404/index.html
litellm/proxy/_experimental/out/model_hub/index.html
litellm/proxy/_experimental/out/onboarding/index.html
litellm/tests/log.txt
litellm/tests/langfuse.log
litellm/tests/langfuse.log
litellm/proxy/google-cloud-sdk/*
tests/llm_translation/log.txt
.env

View file

@ -1,49 +0,0 @@
repos:
- repo: local
hooks:
- id: pyright
name: pyright
entry: pyright
language: system
types: [python]
files: ^litellm/
- id: isort
name: isort
entry: isort
language: system
types: [python]
files: litellm/.*\.py
exclude: ^litellm/__init__.py$
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 7.0.0 # The version of flake8 to use
hooks:
- id: flake8
exclude: ^litellm/tests/|^litellm/proxy/tests/
additional_dependencies: [flake8-print]
files: litellm/.*\.py
# - id: flake8
# name: flake8 (router.py function length)
# files: ^litellm/router\.py$
# args: [--max-function-length=40]
# # additional_dependencies: [flake8-functions]
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
- repo: local
hooks:
- id: check-files-match
name: Check if files match
entry: python3 ci_cd/check_files_match.py
language: system
# - id: check-file-length
# name: Check file length
# entry: python check_file_length.py
# args: ["10000"] # set your desired maximum number of lines
# language: python
# files: litellm/.*\.py
# exclude: ^litellm/tests/

14
.readthedocs.yaml Normal file
View file

@ -0,0 +1,14 @@
# Read the Docs configuration file for MkDocs projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
mkdocs:
configuration: mkdocs.yml

View file

@ -1,76 +0,0 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
# Set the working directory to /app
WORKDIR /app
# Install build dependencies
RUN apt-get clean && apt-get update && \
apt-get install -y gcc python3-dev && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install build
# Copy the current directory contents into the container at /app
COPY . .
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Build the package
RUN rm -rf dist/* && python -m build
# There should be only one wheel file now, assume the build only creates one
RUN ls -1 dist/*.whl | head -1
# Install the package
RUN pip install dist/*.whl
# install dependencies as wheels
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
RUN pip install redisvl==0.0.7 --no-deps
# ensure pyjwt is used, not jwt
RUN pip uninstall jwt -y
RUN pip uninstall PyJWT -y
RUN pip install PyJWT==2.9.0 --no-cache-dir
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE AS runtime
# Update dependencies and clean up - handles debian security issue
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . .
RUN ls -la /app
# Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
COPY --from=builder /app/dist/*.whl .
COPY --from=builder /wheels/ /wheels/
# Install the built wheel using pip; again using a wildcard if it's the only file
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
# Generate prisma client
RUN prisma generate
RUN chmod +x docker/entrypoint.sh
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
CMD ["--port", "4000"]

View file

@ -1,8 +1,3 @@
Portions of this software are licensed as follows:
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
---
MIT License
Copyright (c) 2023 Berri AI

387
README.md
View file

@ -1,70 +1,33 @@
<h1 align="center">
🚅 LiteLLM
</h1>
<p align="center">
<p align="center">
<a href="https://render.com/deploy?repo=https://github.com/BerriAI/litellm" target="_blank" rel="nofollow"><img src="https://render.com/images/deploy-to-render-button.svg" alt="Deploy to Render"></a>
<a href="https://railway.app/template/HLP0Ub?referralCode=jch2ME">
<img src="https://railway.app/button.svg" alt="Deploy on Railway">
</a>
</p>
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
<br>
</p>
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
<h4 align="center">
<a href="https://pypi.org/project/litellm/" target="_blank">
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
</a>
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
</a>
<a href="https://www.ycombinator.com/companies/berriai">
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
</a>
<a href="https://wa.link/huol9n">
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square" alt="Whatsapp">
</a>
<a href="https://discord.gg/wuPM9dRgDw">
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square" alt="Discord">
</a>
</h4>
# *🚅 litellm*
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.1-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
[![ New Relea Tests](https://github.com/BerriAI/litellm/actions/workflows/tests.yml/badge.svg)](https://github.com/BerriAI/litellm/actions/workflows/tests.yml)
[![Publish to PyPI](https://github.com/BerriAI/litellm/actions/workflows/publish_pypi.yml/badge.svg?branch=main)](https://github.com/BerriAI/litellm/actions/workflows/publish_pypi.yml) ![Downloads](https://img.shields.io/pypi/dm/litellm)
LiteLLM manages:
[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
a simple & light 100 line package to call OpenAI, Azure, Cohere, Anthropic API Endpoints
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
litellm manages:
- translating inputs to completion and embedding endpoints
- guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
# usage
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
Read the docs - https://litellm.readthedocs.io/en/latest/
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
> [!IMPORTANT]
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
> LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
```shell
## quick start
```
pip install litellm
```
```python
from litellm import completion
import os
## set ENV variables
os.environ["OPENAI_API_KEY"] = "your-openai-key"
os.environ["COHERE_API_KEY"] = "your-cohere-key"
# ENV variables can be set in .env file, too. Example in .env.example
os.environ["OPENAI_API_KEY"] = "openai key"
os.environ["COHERE_API_KEY"] = "cohere key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
@ -72,304 +35,26 @@ messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="gpt-3.5-turbo", messages=messages)
# cohere call
response = completion(model="command-nightly", messages=messages)
print(response)
response = completion("command-nightly", messages)
# azure openai call
response = completion("chatgpt-test", messages, azure=True)
# openrouter call
response = completion("google/palm-2-codechat-bison", messages)
```
Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
Stable version
```
pip install litellm==0.1.1
```
Call any model supported by a provider, with `model=<provider_name>/<model_name>`. There might be provider-specific details here, so refer to [provider docs for more information](https://docs.litellm.ai/docs/providers)
# hosted version
- [Grab time if you want access 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
## Async ([Docs](https://docs.litellm.ai/docs/completion/stream#async-completion))
# why did I build this
- **Need for simplicity**: My code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
```python
from litellm import acompletion
import asyncio
async def test_get_response():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
response = await acompletion(model="gpt-3.5-turbo", messages=messages)
return response
response = asyncio.run(test_get_response())
print(response)
```
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
```python
from litellm import completion
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
for part in response:
print(part.choices[0].delta.content or "")
# claude 2
response = completion('claude-2', messages, stream=True)
for part in response:
print(part.choices[0].delta.content or "")
```
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack, MLflow
```python
from litellm import completion
## set env variables for logging tools
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
```
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
Track spend + Load Balance across multiple projects
[Hosted Proxy (Preview)](https://docs.litellm.ai/docs/hosted)
The proxy provides:
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
4. [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)
## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/)
## Quick Start Proxy - CLI
```shell
pip install 'litellm[proxy]'
```
### Step 1: Start litellm proxy
```shell
$ litellm --model huggingface/bigcode/starcoder
#INFO: Proxy running on http://0.0.0.0:4000
```
### Step 2: Make ChatCompletions Request to Proxy
> [!IMPORTANT]
> 💡 [Use LiteLLM Proxy with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/proxy/user_keys)
```python
import openai # openai v1.0.0+
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
])
print(response)
```
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
Connect the proxy with a Postgres DB to create proxy keys
```bash
# Get the code
git clone https://github.com/BerriAI/litellm
# Go to folder
cd litellm
# Add the master key - you can change this after setup
echo 'LITELLM_MASTER_KEY="sk-1234"' > .env
# Add the litellm salt key - you cannot change this after adding a model
# It is used to encrypt / decrypt your LLM API Key credentials
# We recommned - https://1password.com/password-generator/
# password generator to get a random hash for litellm salt key
echo 'LITELLM_SALT_KEY="sk-1234"' > .env
source .env
# Start
docker-compose up
```
UI on `/ui` on your proxy server
![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
Set budgets and rate limits across multiple projects
`POST /key/generate`
### Request
```shell
curl 'http://0.0.0.0:4000/key/generate' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
```
### Expected Response
```shell
{
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
"expires": "2023-11-19T01:38:25.838000+00:00" # datetime object
}
```
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
| [empower](https://docs.litellm.ai/docs/providers/empower) | ✅ | ✅ | ✅ | ✅ |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | |
[**Read the Docs**](https://docs.litellm.ai/docs/)
## Contributing
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
Here's how to modify the repo locally:
Step 1: Clone the repo
```
git clone https://github.com/BerriAI/litellm.git
```
Step 2: Navigate into the project, and install dependencies:
```
cd litellm
poetry install -E extra_proxy -E proxy
```
Step 3: Test your change:
```
cd litellm/tests # pwd: Documents/litellm/litellm/tests
poetry run flake8
poetry run pytest .
```
Step 4: Submit a PR with your changes! 🚀
- push your fork to your GitHub repo
- submit a PR from there
### Building LiteLLM Docker Image
Follow these instructions if you want to build / run the LiteLLM Docker Image yourself.
Step 1: Clone the repo
```
git clone https://github.com/BerriAI/litellm.git
```
Step 2: Build the Docker Image
Build using Dockerfile.non_root
```
docker build -f docker/Dockerfile.non_root -t litellm_test_image .
```
Step 3: Run the Docker Image
Make sure config.yaml is present in the root directory. This is your litellm proxy config file.
```
docker run \
-v $(pwd)/proxy_config.yaml:/app/config.yaml \
-e DATABASE_URL="postgresql://xxxxxxxx" \
-e LITELLM_MASTER_KEY="sk-1234" \
-p 4000:4000 \
litellm_test_image \
--config /app/config.yaml --detailed_debug
```
# Enterprise
For companies that need better security, user management and professional support
[Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
This covers:
- ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
- ✅ **Feature Prioritization**
- ✅ **Custom Integrations**
- ✅ **Professional Support - Dedicated discord + slack**
- ✅ **Custom SLAs**
- ✅ **Secure access with Single Sign-On**
# Support / talk with founders
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
# Why did we build this
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
# Contributors
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
<!-- prettier-ignore-start -->
<!-- markdownlint-disable -->
<!-- markdownlint-restore -->
<!-- prettier-ignore-end -->
<!-- ALL-CONTRIBUTORS-LIST:END -->
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
</a>
# Support
Contact us at ishaan@berri.ai / krrish@berri.ai

View file

@ -0,0 +1,2 @@
__version__ = "1.0.0"
from .main import * # Import all the symbols from main.py

429
build/lib/litellm/main.py Normal file
View file

@ -0,0 +1,429 @@
import os, openai, cohere, replicate, sys
from typing import Any
from func_timeout import func_set_timeout, FunctionTimedOut
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
import json
import traceback
import threading
import dotenv
import traceback
import subprocess
####### ENVIRONMENT VARIABLES ###################
# Loading env variables using dotenv
dotenv.load_dotenv()
set_verbose = False
####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [
'gpt-3.5-turbo',
'gpt-4'
]
open_ai_text_completion_models = [
'text-davinci-003'
]
cohere_models = [
'command-nightly',
]
anthropic_models = [
"claude-2",
"claude-instant-1"
]
####### EMBEDDING MODELS ###################
open_ai_embedding_models = [
'text-embedding-ada-002'
]
#############################################
####### COMPLETION ENDPOINTS ################
#############################################
@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
try:
if azure == True:
# azure configs
openai.api_type = "azure"
openai.api_base = os.environ.get("AZURE_API_BASE")
openai.api_version = os.environ.get("AZURE_API_VERSION")
openai.api_key = os.environ.get("AZURE_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.ChatCompletion.create(
engine=model,
messages = messages
)
elif "replicate" in model:
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
# checking in case user set it to REPLICATE_API_KEY instead
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
prompt = " ".join([message["content"] for message in messages])
input = [{"prompt": prompt}]
if max_tokens:
input["max_length"] = max_tokens # for t5 models
input["max_new_tokens"] = max_tokens # for llama2 models
## LOGGING
logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
## COMPLETION CALL
output = replicate.run(
model,
input=input)
response = ""
for item in output:
response += item
new_response = {
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": response,
"role": "assistant"
}
}
]
}
response = new_response
elif model in anthropic_models:
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
prompt = f"{HUMAN_PROMPT}"
for message in messages:
if "role" in message:
if message["role"] == "user":
prompt += f"{HUMAN_PROMPT}{message['content']}"
else:
prompt += f"{AI_PROMPT}{message['content']}"
else:
prompt += f"{HUMAN_PROMPT}{message['content']}"
prompt += f"{AI_PROMPT}"
anthropic = Anthropic()
if max_tokens:
max_tokens_to_sample = max_tokens
else:
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
## LOGGING
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
## COMPLETION CALL
completion = anthropic.completions.create(
model=model,
prompt=prompt,
max_tokens_to_sample=max_tokens_to_sample
)
new_response = {
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": completion.completion,
"role": "assistant"
}
}
]
}
print(f"new response: {new_response}")
response = new_response
elif model in cohere_models:
cohere_key = os.environ.get("COHERE_API_KEY")
co = cohere.Client(cohere_key)
prompt = " ".join([message["content"] for message in messages])
## LOGGING
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = co.generate(
model=model,
prompt = prompt
)
new_response = {
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": response[0],
"role": "assistant"
}
}
],
}
response = new_response
elif model in open_ai_chat_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
## LOGGING
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.ChatCompletion.create(
model=model,
messages = messages
)
elif model in open_ai_text_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
prompt = " ".join([message["content"] for message in messages])
## LOGGING
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.Completion.create(
model=model,
prompt = prompt
)
else:
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
return response
except Exception as e:
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
raise e
### EMBEDDING ENDPOINTS ####################
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
response = None
if azure == True:
# azure configs
openai.api_type = "azure"
openai.api_base = os.environ.get("AZURE_API_BASE")
openai.api_version = os.environ.get("AZURE_API_VERSION")
openai.api_key = os.environ.get("AZURE_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, engine=model)
print_verbose(f"response_value: {str(response)[:50]}")
elif model in open_ai_embedding_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, model=model)
print_verbose(f"response_value: {str(response)[:50]}")
else:
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
return response
### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
class litellm_client:
def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor
set_verbose = verbose
self.success_callback = success_callback
self.failure_callback = failure_callback
self.logger_fn = None # if user passes in their own logging function
self.callback_list = list(set(self.success_callback + self.failure_callback))
self.set_callbacks()
## COMPLETION CALL
def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
try:
self.logger_fn = logger_fn
response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
my_thread.start()
return response
except Exception as e:
args = locals() # get all the param values
self.handle_failure(e, args)
raise e
## EMBEDDING CALL
def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
try:
self.logger_fn = logger_fn
response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
my_thread.start()
return response
except Exception as e:
args = locals() # get all the param values
self.handle_failure(e, args)
raise e
def set_callbacks(self): #instantiate any external packages
for callback in self.callback_list: # only install what's required
if callback == "sentry":
try:
import sentry_sdk
except ImportError:
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
import sentry_sdk
self.sentry_sdk = sentry_sdk
self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
self.capture_exception = self.sentry_sdk.capture_exception
self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
elif callback == "posthog":
try:
from posthog import Posthog
except:
print_verbose("Package 'posthog' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
from posthog import Posthog
self.posthog = Posthog(
project_api_key=os.environ.get("POSTHOG_API_KEY"),
host=os.environ.get("POSTHOG_API_URL"))
elif callback == "slack":
try:
from slack_bolt import App
except ImportError:
print_verbose("Package 'slack_bolt' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
from slack_bolt import App
self.slack_app = App(
token=os.environ.get("SLACK_API_TOKEN"),
signing_secret=os.environ.get("SLACK_API_SECRET")
)
self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
def handle_input(self, model_call_details={}):
if len(model_call_details.keys()) > 0:
model = model_call_details["model"] if "model" in model_call_details else None
if model:
for callback in self.callback_list:
if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
self.add_breadcrumb(
category=f'{model}',
message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
level='info',
)
if self.logger_fn and callable(self.logger_fn):
self.logger_fn(model_call_details)
pass
def handle_success(self, model, messages, additional_details):
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
additional_details["litellm_model"] = str(model)
additional_details["litellm_messages"] = str(messages)
for callback in self.success_callback:
try:
if callback == "posthog":
ph_obj = {}
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
if "user_id" in additional_details:
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
else:
self.posthog.capture(event_name, ph_obj)
pass
elif callback == "slack":
slack_msg = ""
if len(additional_details.keys()) > 0:
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_msg += f"Successful call"
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
except:
pass
if success_handler and callable(success_handler):
call_details = {
"model": model,
"messages": messages,
"additional_details": additional_details
}
success_handler(call_details)
pass
def handle_failure(self, exception, args):
args.pop("self")
additional_details = args.pop("additional_details", {})
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
for callback in self.failure_callback:
try:
if callback == "slack":
slack_msg = ""
for param in args:
slack_msg += f"{param}: {args[param]}\n"
if len(additional_details.keys()) > 0:
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_msg += f"Traceback: {traceback.format_exc()}"
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
elif callback == "sentry":
self.capture_exception(exception)
elif callback == "posthog":
if len(additional_details.keys()) > 0:
ph_obj = {}
for param in args:
ph_obj[param] += args[param]
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
if "user_id" in additional_details:
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
else:
self.posthog.capture(event_name, ph_obj)
else:
pass
except:
print(f"got an error calling {callback} - {traceback.format_exc()}")
if failure_handler and callable(failure_handler):
call_details = {
"exception": exception,
"additional_details": additional_details
}
failure_handler(call_details)
pass
####### HELPER FUNCTIONS ################
#Logging function -> log the exact model details + what's being sent | Non-Blocking
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
try:
model_call_details = {}
model_call_details["model"] = model
model_call_details["input"] = input
model_call_details["azure"] = azure
model_call_details["additional_args"] = additional_args
if logger_fn and callable(logger_fn):
try:
# log additional call details -> api key, etc.
if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
model_call_details["api_type"] = openai.api_type
model_call_details["api_base"] = openai.api_base
model_call_details["api_version"] = openai.api_version
model_call_details["api_key"] = openai.api_key
elif "replicate" in model:
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
elif model in anthropic_models:
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
elif model in cohere_models:
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
except:
print_verbose(f"Basic model call details: {model_call_details}")
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
pass
else:
print_verbose(f"Basic model call details: {model_call_details}")
pass
except:
pass
## Set verbose to true -> ```litellm.verbose = True```
def print_verbose(print_statement):
if set_verbose:
print(f"LiteLLM: {print_statement}")
print("Get help - https://discord.com/invite/wuPM9dRgDw")

View file

@ -1,28 +0,0 @@
import sys
def check_file_length(max_lines, filenames):
bad_files = []
for filename in filenames:
with open(filename, "r") as file:
lines = file.readlines()
if len(lines) > max_lines:
bad_files.append((filename, len(lines)))
return bad_files
if __name__ == "__main__":
max_lines = int(sys.argv[1])
filenames = sys.argv[2:]
bad_files = check_file_length(max_lines, filenames)
if bad_files:
bad_files.sort(
key=lambda x: x[1], reverse=True
) # Sort files by length in descending order
for filename, length in bad_files:
print(f"{filename}: {length} lines")
sys.exit(1)
else:
sys.exit(0)

View file

@ -1,32 +0,0 @@
import sys
import filecmp
import shutil
def main(argv=None):
print(
"Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match."
)
file1 = "model_prices_and_context_window.json"
file2 = "litellm/model_prices_and_context_window_backup.json"
cmp_result = filecmp.cmp(file1, file2, shallow=False)
if cmp_result:
print(f"Passed! Files {file1} and {file2} match.")
return 0
else:
print(
f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}."
)
copy_content(file1, file2)
return 1
def copy_content(source, destination):
shutil.copy2(source, destination)
if __name__ == "__main__":
sys.exit(main())

View file

@ -1,32 +0,0 @@
component_management:
individual_components:
- component_id: "Router"
paths:
- "router"
- component_id: "LLMs"
paths:
- "*/llms/*"
- component_id: "Caching"
paths:
- "*/caching/*"
- ".*redis.*"
- component_id: "litellm_logging"
paths:
- "*/integrations/*"
- ".*litellm_logging.*"
- component_id: "Proxy_Authentication"
paths:
- "*/proxy/auth/**"
comment:
layout: "header, diff, flags, components" # show component info in the PR comment
coverage:
status:
project:
default:
target: auto
threshold: 1% # at maximum allow project coverage to drop by 1%
patch:
default:
target: auto
threshold: 0% # patch coverage should be 100%

File diff suppressed because one or more lines are too long

View file

@ -1,406 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZwuaylskLxFu",
"outputId": "d684d6a3-32fe-4beb-c378-c39134bcf8cc"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting litellm==0.1.363\n",
" Downloading litellm-0.1.363-py3-none-any.whl (34 kB)\n",
"Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.27.8)\n",
"Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (1.0.0)\n",
"Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.4.0)\n",
"Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (2.31.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.65.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.8.5)\n",
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.363) (2022.10.31)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.2.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.26.16)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (2023.7.22)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (23.1.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (6.0.4)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.0.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.9.2)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.4.0)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.3.1)\n",
"Installing collected packages: litellm\n",
" Attempting uninstall: litellm\n",
" Found existing installation: litellm 0.1.362\n",
" Uninstalling litellm-0.1.362:\n",
" Successfully uninstalled litellm-0.1.362\n",
"Successfully installed litellm-0.1.363\n"
]
}
],
"source": [
"!pip install litellm==\"0.1.363\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "W216G__XL19Q"
},
"outputs": [],
"source": [
"# @title Import litellm & Set env variables\n",
"import litellm\n",
"import os\n",
"\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = \" \" #@param"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ff1lKwUMMLJj",
"outputId": "bfddf6f8-36d4-45e5-92dc-349083fa41b8"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
" Result from claude-instant-1 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" The Los Angeles Dodgers won the 2020 World Series, defeating the Tampa Bay Rays 4-2. It was the Dodgers' first World Series title since 1988.\"}}], 'created': 1691536677.2676156, 'model': 'claude-instant-1', 'usage': {'prompt_tokens': 30, 'completion_tokens': 32, 'total_tokens': 62}}\n",
"\n",
"\n",
" Result from claude-2 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ' The Los Angeles Dodgers won'}}], 'created': 1691536677.944753, 'model': 'claude-2', 'usage': {'prompt_tokens': 30, 'completion_tokens': 5, 'total_tokens': 35}}\n"
]
}
],
"source": [
"# @title Request Claude Instant-1 and Claude-2\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n",
" ]\n",
"\n",
"result = litellm.completion('claude-instant-1', messages)\n",
"print(\"\\n\\n Result from claude-instant-1\", result)\n",
"result = litellm.completion('claude-2', messages, max_tokens=5, temperature=0.2)\n",
"print(\"\\n\\n Result from claude-2\", result)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "06hWKnNQMrV-",
"outputId": "7fdec0eb-d4a9-4882-f9c4-987ff9a31114"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Here\n",
"'s\n",
" a\n",
" quick\n",
" overview\n",
" of\n",
" how\n",
" a\n",
" court\n",
" case\n",
" can\n",
" reach\n",
" the\n",
" U\n",
".\n",
"S\n",
".\n",
" Supreme\n",
" Court\n",
":\n",
"\n",
"\n",
"-\n",
" The\n",
" case\n",
" must\n",
" first\n",
" be\n",
" heard\n",
" in\n",
" a\n",
" lower\n",
" trial\n",
" court\n",
" (\n",
"either\n",
" a\n",
" state\n",
" court\n",
" or\n",
" federal\n",
" district\n",
" court\n",
").\n",
" The\n",
" trial\n",
" court\n",
" makes\n",
" initial\n",
" r\n",
"ulings\n",
" and\n",
" produces\n",
" a\n",
" record\n",
" of\n",
" the\n",
" case\n",
".\n",
"\n",
"\n",
"-\n",
" The\n",
" losing\n",
" party\n",
" can\n",
" appeal\n",
" the\n",
" decision\n",
" to\n",
" an\n",
" appeals\n",
" court\n",
" (\n",
"a\n",
" state\n",
" appeals\n",
" court\n",
" for\n",
" state\n",
" cases\n",
",\n",
" or\n",
" a\n",
" federal\n",
" circuit\n",
" court\n",
" for\n",
" federal\n",
" cases\n",
").\n",
" The\n",
" appeals\n",
" court\n",
" reviews\n",
" the\n",
" trial\n",
" court\n",
"'s\n",
" r\n",
"ulings\n",
" and\n",
" can\n",
" affirm\n",
",\n",
" reverse\n",
",\n",
" or\n",
" modify\n",
" the\n",
" decision\n",
".\n",
"\n",
"\n",
"-\n",
" If\n",
" a\n",
" party\n",
" is\n",
" still\n",
" unsat\n",
"isf\n",
"ied\n",
" after\n",
" the\n",
" appeals\n",
" court\n",
" rules\n",
",\n",
" they\n",
" can\n",
" petition\n",
" the\n",
" Supreme\n",
" Court\n",
" to\n",
" hear\n",
" the\n",
" case\n",
" through\n",
" a\n",
" writ\n",
" of\n",
" cert\n",
"ior\n",
"ari\n",
".\n",
" \n",
"\n",
"\n",
"-\n",
" The\n",
" Supreme\n",
" Court\n",
" gets\n",
" thousands\n",
" of\n",
" cert\n",
" petitions\n",
" every\n",
" year\n",
" but\n",
" usually\n",
" only\n",
" agrees\n",
" to\n",
" hear\n",
" about\n",
" 100\n",
"-\n",
"150\n",
" of\n",
" cases\n",
" that\n",
" have\n",
" significant\n",
" national\n",
" importance\n",
" or\n",
" where\n",
" lower\n",
" courts\n",
" disagree\n",
" on\n",
" federal\n",
" law\n",
".\n",
" \n",
"\n",
"\n",
"-\n",
" If\n",
" 4\n",
" out\n",
" of\n",
" the\n",
" 9\n",
" Just\n",
"ices\n",
" vote\n",
" to\n",
" grant\n",
" cert\n",
" (\n",
"agree\n",
" to\n",
" hear\n",
" the\n",
" case\n",
"),\n",
" it\n",
" goes\n",
" on\n",
" the\n",
" Supreme\n",
" Court\n",
"'s\n",
" do\n",
"cket\n",
" for\n",
" arguments\n",
".\n",
"\n",
"\n",
"-\n",
" The\n",
" Supreme\n",
" Court\n",
" then\n",
" hears\n",
" oral\n",
" arguments\n",
",\n",
" considers\n",
" written\n",
" brief\n",
"s\n",
",\n",
" examines\n",
" the\n",
" lower\n",
" court\n",
" records\n",
",\n",
" and\n",
" issues\n",
" a\n",
" final\n",
" ruling\n",
" on\n",
" the\n",
" case\n",
",\n",
" which\n",
" serves\n",
" as\n",
" binding\n",
" precedent\n"
]
}
],
"source": [
"# @title Streaming Example: Request Claude-2\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\"role\": \"user\", \"content\": \"how does a court case get to the Supreme Court?\"}\n",
" ]\n",
"\n",
"result = litellm.completion('claude-2', messages, stream=True)\n",
"for part in result:\n",
" print(part.choices[0].delta.content or \"\")\n",
"\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,423 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
"* Completion - Quick start\n",
"* Completion - Streaming\n",
"* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "BmX0b5Ueh91v"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iHq4d0dpfawS"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"source": [
"import os, litellm"
],
"metadata": {
"id": "mnveHO5dfcB0"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Quick start"
],
"metadata": {
"id": "eo88QUdbiDIE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Completion - Streaming"
],
"metadata": {
"id": "dQMkM-diiKdE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
],
"metadata": {
"id": "uVvJDVn4g1i1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in separate threads"
],
"metadata": {
"id": "4xrOPnt-oqwm"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
],
"metadata": {
"id": "V5b5taJPjvC3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
],
"metadata": {
"id": "lx8DbMBqoAoN"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
],
"metadata": {
"id": "pHYANOlOkoDh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "yB2NDOO4oxrp"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
]
}
]
}

View file

@ -1,310 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "fNkMBurtxawJ"
},
"source": [
"# LiteLLM Bedrock Usage\n",
"Important Note: For Bedrock Requests you need to ensure you have `pip install boto3>=1.28.57`, boto3 supports bedrock from `boto3>=1.28.57` and higher "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "htAufI28xeSy"
},
"source": [
"## Pre-Requisites"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jT5GbPjAuDTp"
},
"outputs": [],
"source": [
"!pip install litellm\n",
"!pip install boto3>=1.28.57 # this version onwards has bedrock support"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "H4Vu4er2xnfI"
},
"source": [
"## Set Bedrock/AWS Credentials"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "CtTrBthWxp-t"
},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"AWS_ACCESS_KEY_ID\"] = \"\" # Access key\n",
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"\" # Secret access key\n",
"os.environ[\"AWS_REGION_NAME\"] = \"\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "ycRK9NUdx1EI"
},
"source": [
"## Anthropic Requests"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tgkuoHa5uLOy",
"outputId": "27a78e86-c6a7-4bcc-8559-0813cb978426"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Claude instant 1, response\n",
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm doing well, thanks for asking!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-4f2e64a1-56d2-43f2-90d3-60ffd6f5086d\",\n",
" \"created\": 1696256761.3265705,\n",
" \"model\": \"anthropic.claude-instant-v1\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 9,\n",
" \"total_tokens\": 20\n",
" },\n",
" \"finish_reason\": \"stop_sequence\"\n",
"}\n",
"Claude v2, response\n",
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm doing well, thanks for asking!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-34f59b33-f94e-40c2-8bdb-f4af0813405e\",\n",
" \"created\": 1696256762.2137017,\n",
" \"model\": \"anthropic.claude-v2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 9,\n",
" \"total_tokens\": 20\n",
" },\n",
" \"finish_reason\": \"stop_sequence\"\n",
"}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"bedrock/anthropic.claude-instant-v1\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Claude instant 1, response\")\n",
"print(response)\n",
"\n",
"\n",
"response = completion(\n",
" model=\"bedrock/anthropic.claude-v2\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Claude v2, response\")\n",
"print(response)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "HnM-HtM3yFMT"
},
"source": [
"## Anthropic Requests - With Streaming"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_JZvg2yovRsU"
},
"outputs": [],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"bedrock/anthropic.claude-instant-v1\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True,\n",
")\n",
"print(\"Claude instant 1, response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"\n",
"response = completion(\n",
" model=\"bedrock/anthropic.claude-v2\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Claude v2, response\")\n",
"print(response)\n",
"for chunk in response:\n",
" print(chunk)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "zj1U1mh9zEhP"
},
"source": [
"## A121 Requests"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6wK6MZLovU7r",
"outputId": "4cf80c04-f15d-4066-b4c7-113b551538de"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"J2 ultra response\n",
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"\\nHi, I'm doing well, thanks for asking! How about you?\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-f2de678f-0e70-4e36-a01f-8b184c2e4d50\",\n",
" \"created\": 1696257116.044311,\n",
" \"model\": \"ai21.j2-ultra\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 16,\n",
" \"total_tokens\": 22\n",
" }\n",
"}\n",
"J2 mid response\n",
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"\\nGood. And you?\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-420d6bf9-36d8-484b-93b4-4c9e00f7ce2e\",\n",
" \"created\": 1696257116.5756805,\n",
" \"model\": \"ai21.j2-mid\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 6,\n",
" \"total_tokens\": 12\n",
" }\n",
"}\n"
]
}
],
"source": [
"response = completion(\n",
" model=\"bedrock/ai21.j2-ultra\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
")\n",
"print(\"J2 ultra response\")\n",
"print(response)\n",
"\n",
"response = completion(\n",
" model=\"bedrock/ai21.j2-mid\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
")\n",
"print(\"J2 mid response\")\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Y5gGZIwzzSON"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,241 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Use LiteLLM to calculate costs for all your completion calls\n",
"In this notebook we'll use `litellm.completion_cost` to get completion costs"
],
"metadata": {
"id": "BgWr0PsUR3vV"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ViczFTjsDzSI"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.549 # use 0.1.549 or later"
]
},
{
"cell_type": "markdown",
"source": [
"## Calculating costs for gpt-3.5 turbo completion()"
],
"metadata": {
"id": "k_1CWUwmSNtj"
}
},
{
"cell_type": "code",
"source": [
"from litellm import completion, completion_cost\n",
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"\n",
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
"response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=messages,\n",
")\n",
"\n",
"print(response)\n",
"\n",
"cost = completion_cost(completion_response=response)\n",
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
"print(formatted_string)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Tp0fyk-jD0pP",
"outputId": "ce885fb3-3237-41b2-9d8b-3fb30bba498b"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"got response\n",
"{\n",
" \"id\": \"chatcmpl-7vyCApIZaCxP36kb9meUMN2DFSJPh\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694050442,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI and I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 28,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n",
"Cost for completion call: $0.0000755000\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calculating costs for Together Computer completion()"
],
"metadata": {
"id": "AjDs4G-uS6PS"
}
},
{
"cell_type": "code",
"source": [
"from litellm import completion, completion_cost\n",
"import os\n",
"os.environ['TOGETHERAI_API_KEY'] = \"\"\n",
"\n",
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
"response = completion(\n",
" model=\"togethercomputer/llama-2-70b-chat\",\n",
" messages=messages,\n",
")\n",
"\n",
"print(response)\n",
"\n",
"cost = completion_cost(completion_response=response)\n",
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
"print(formatted_string)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jMPsUV-KEa6a",
"outputId": "7a69b291-f149-4b9c-8a78-9c8142bac759"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"Hello! I'm doing well, thanks for asking. I hope you're having a great\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694050771.2821715,\n",
" \"model\": \"togethercomputer/llama-2-70b-chat\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 12,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 30\n",
" }\n",
"}\n",
"Cost for completion call: $0.0000900000\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calculating costs for Replicate Llama2 completion()"
],
"metadata": {
"id": "vEa4s6-7TANS"
}
},
{
"cell_type": "code",
"source": [
"from litellm import completion, completion_cost\n",
"import os\n",
"os.environ['REPLICATE_API_KEY'] = \"\"\n",
"\n",
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
"response = completion(\n",
" model=\"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n",
" messages=messages,\n",
")\n",
"\n",
"print(response)\n",
"\n",
"cost = completion_cost(completion_response=response)\n",
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
"print(formatted_string)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Xf1TKRDuS1bR",
"outputId": "cfb2b484-a6e5-41ad-86c5-7e66aba27648"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Hello! I'm doing well, thanks for asking. How about you? Is there anything you need help with today?\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694050893.4534576,\n",
" \"model\": \"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 24,\n",
" \"total_tokens\": 30\n",
" },\n",
" \"ended\": 1694050896.6689413\n",
"}\n",
"total_replicate_run_time 3.2154836654663086\n",
"Cost for completion call: $0.0045016771\n"
]
}
]
}
]
}

View file

@ -1,272 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "9dKM5k8qsMIj"
},
"source": [
"## LiteLLM HuggingFace\n",
"Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BVDdmCp-o97j"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yp5UXRqtpu9f"
},
"source": [
"## Hugging Face Free Serverless Inference API\n",
"Read more about the Free Serverless Inference API here: https://huggingface.co/docs/api-inference.\n",
"\n",
"In order to use litellm to call Serverless Inference API:\n",
"\n",
"* Browse Serverless Inference compatible models here: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation.\n",
"* Copy the model name from hugging face\n",
"* Set `model = \"huggingface/<model-name>\"`\n",
"\n",
"Example set `model=huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct` to call `meta-llama/Meta-Llama-3.1-8B-Instruct`\n",
"\n",
"https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Pi5Oww8gpCUm",
"outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ModelResponse(id='chatcmpl-c54dfb68-1491-4d68-a4dc-35e603ea718a', choices=[Choices(finish_reason='eos_token', index=0, message=Message(content=\"I'm just a computer program, so I don't have feelings, but thank you for asking! How can I assist you today?\", role='assistant', tool_calls=None, function_call=None))], created=1724858285, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=47, total_tokens=74))\n",
"ModelResponse(id='chatcmpl-d2ae38e6-4974-431c-bb9b-3fa3f95e5a6d', choices=[Choices(finish_reason='length', index=0, message=Message(content=\"\\n\\nIm doing well, thank you. Ive been keeping busy with work and some personal projects. How about you?\\n\\nI'm doing well, thank you. I've been enjoying some time off and catching up on some reading. How can I assist you today?\\n\\nI'm looking for a good book to read. Do you have any recommendations?\\n\\nOf course! Here are a few book recommendations across different genres:\\n\\n1.\", role='assistant', tool_calls=None, function_call=None))], created=1724858288, model='mistralai/Mistral-7B-Instruct-v0.3', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=85, prompt_tokens=6, total_tokens=91))\n"
]
}
],
"source": [
"import os\n",
"import litellm\n",
"\n",
"# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
"\n",
"# Call https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct\n",
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
"response = litellm.completion(\n",
" model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(response)\n",
"\n",
"\n",
"# Call https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3\n",
"response = litellm.completion(\n",
" model=\"huggingface/mistralai/Mistral-7B-Instruct-v0.3\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-klhAhjLtclv"
},
"source": [
"## Hugging Face Dedicated Inference Endpoints\n",
"\n",
"Steps to use\n",
"* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
"* Set `api_base` to your deployed api base\n",
"* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Lbmw8Gl_pHns",
"outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"length\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": -8.9481967812\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n",
" \"created\": 1695871068.8413374,\n",
" \"model\": \"glaiveai/glaive-coder-7b\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 24\n",
" }\n",
"}\n"
]
}
],
"source": [
"import os\n",
"import litellm\n",
"\n",
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
"\n",
"# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
"# set api base to your deployed api endpoint from hugging face\n",
"response = litellm.completion(\n",
" model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
")\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EU0UubrKzTFe"
},
"source": [
"## HuggingFace - Streaming (Serveless or Dedicated)\n",
"Set stream = True"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y-QfIvA-uJKX",
"outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<litellm.utils.CustomStreamWrapper object at 0x1278471d0>\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='I', role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'m\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' just', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' a', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' computer', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' program', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' so', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' don', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'t\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' have', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' feelings', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' but', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' thank', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' for', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' asking', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='!', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' How', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' can', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' assist', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' today', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='?', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='<|eot_id|>', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n"
]
}
],
"source": [
"import os\n",
"import litellm\n",
"\n",
"# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
"\n",
"# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
"# set api base to your deployed api endpoint from hugging face\n",
"response = litellm.completion(\n",
" model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"\n",
"print(response)\n",
"\n",
"for chunk in response:\n",
" print(chunk)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "CKXAnK55zQRl"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,179 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM OpenRouter Cookbook"
],
"metadata": {
"id": "iFEmsVJI_2BR"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cBlUhCEP_xj4"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"\n",
"os.environ['OPENROUTER_API_KEY'] = \"\""
],
"metadata": {
"id": "p-MQqWOT_1a7"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from litellm import completion\n",
"response = completion(\n",
" model=\"openrouter/google/palm-2-chat-bison\",\n",
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ze8JqMqWAARO",
"outputId": "64f3e836-69fa-4f8e-fb35-088a913bbe98"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject id=gen-W8FTMSIEorCp3vG5iYIgNMR4IeBv at 0x7c3dcef1f060> JSON: {\n",
" \"id\": \"gen-W8FTMSIEorCp3vG5iYIgNMR4IeBv\",\n",
" \"model\": \"chat-bison@001\",\n",
" \"choices\": [\n",
" {\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"```\\n#include <stdio.h>\\n\\nint main() {\\n printf(\\\"Hi!\\\\n\\\");\\n return 0;\\n}\\n```\"\n",
" }\n",
" }\n",
" ],\n",
" \"response_ms\": 7817.777999999999\n",
"}"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"response = completion(\n",
" model=\"openrouter/anthropic/claude-2\",\n",
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-LnhELrnAM_J",
"outputId": "d51c7ab7-d761-4bd1-f849-1534d9df4cd0"
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject id=gen-IiuV7ZNimDufVeutBHrl8ajPuzEh at 0x7c3dcea67560> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \" Here is some simple code to print \\\"Hi\\\":\\n\\n```python\\nprint(\\\"Hi\\\")\\n```\\n\\nThis uses the print() function in Python to output the text \\\"Hi\\\".\"\n",
" },\n",
" \"finish_reason\": \"stop_sequence\"\n",
" }\n",
" ],\n",
" \"model\": \"claude-2.0\",\n",
" \"id\": \"gen-IiuV7ZNimDufVeutBHrl8ajPuzEh\",\n",
" \"response_ms\": 8112.443000000001\n",
"}"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"response = completion(\n",
" model=\"openrouter/meta-llama/llama-2-70b-chat\",\n",
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dJBOUYdwCEn1",
"outputId": "ffa18679-ec15-4dad-fe2b-68665cdf36b0"
},
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject id=gen-PyMd3yyJ0aQsCgIY9R8XGZoAtPbl at 0x7c3dceefcae0> JSON: {\n",
" \"id\": \"gen-PyMd3yyJ0aQsCgIY9R8XGZoAtPbl\",\n",
" \"model\": \"togethercomputer/llama-2-70b-chat\",\n",
" \"choices\": [\n",
" {\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"*gives a sly smile as they type*\\n\\nHey there, handsome. \\ud83d\\ude0f\\n\\nWhat brings you to my neck of the woods today? \\ud83d\\ude18\"\n",
" }\n",
" }\n",
" ],\n",
" \"response_ms\": 9618.775\n",
"}"
]
},
"metadata": {},
"execution_count": 13
}
]
}
]
}

View file

@ -1,568 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "dwGtLi_tvM6N"
},
"source": [
"# Using LiteLLM with Petals"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bdlgaWQqDpzj"
},
"outputs": [],
"source": [
"!pip install litellm # 0.1.715 and upwards"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5Id2QKwOEH8X"
},
"outputs": [],
"source": [
"# install petals\n",
"!pip install git+https://github.com/bigscience-workshop/petals"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "k42fldw3veSN"
},
"source": [
"## petals-team/StableBeluga2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tIHcEHdSDqju",
"outputId": "485dbf54-395c-433a-bbf4-8eb70a9fa624"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
"Sep 19 18:39:50.634 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
"Sep 19 18:39:50.639 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: StableBeluga2-hf\n",
"Sep 19 18:40:13.920 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:40 via …HfQWVM => 40:80 via …Zj98Se\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"Hello, how are you?\\nI'm doing well, thank you. I'm just getting ready to go to the gym.\\nOh, that's great. I'm trying to get back into a workout routine myself.\\nYeah,\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-f09d79b3-c1d1-49b7-b55f-cd8dfa1043bf\",\n",
" \"created\": 1695148897.473613,\n",
" \"model\": \"petals-team/StableBeluga2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 45,\n",
" \"total_tokens\": 51\n",
" }\n",
"}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(model=\"petals/petals-team/StableBeluga2\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], max_tokens=50)\n",
"\n",
"print(response)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "J8DubRnHvh_j"
},
"source": [
"## huggyllama/llama-65b"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 538,
"referenced_widgets": [
"2fec5cc400424671a3d517327117d18a",
"3687c76fe84d464baaf35366b21e83b3",
"c29d4460dbaa441cae110b58e0014151",
"6560449a38bf4a7bacd97ccaacf01c4c",
"5fbd6ae281984d28ba59ebfd0279eda7",
"323e30e275434aeea241163e5f1f9031",
"48f4adec51c94f9da6e4c4564daeff84",
"2a672981a44b4a7fb30674f97f4c10c6",
"d75ae8d22ea74840b4c80c8f386384c4",
"54c06312ecff4e7588665e8b0cb7118b",
"300078a9d1a6483fba81a4be63793ff7"
]
},
"id": "IlTCJwDsNvgF",
"outputId": "2e84d125-d982-48ed-8a92-6ca438a50d0c"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sep 19 18:41:37.912 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
"Sep 19 18:41:37.914 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: llama-65b-hf\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2fec5cc400424671a3d517327117d18a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.2` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
" warnings.warn(\n",
"Sep 19 18:41:48.396 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:80 via …g634yJ\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"Hello, how are you?\\nI'm fine, thank you. And\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-3496e6eb-2a27-4f94-8d75-70648eacd88f\",\n",
" \"created\": 1695148912.9116046,\n",
" \"model\": \"huggyllama/llama-65b\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 14,\n",
" \"total_tokens\": 20\n",
" }\n",
"}\n"
]
}
],
"source": [
"response = completion(model=\"petals/huggyllama/llama-65b\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], temperature=0.2, max_tokens=10)\n",
"\n",
"print(response)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"2a672981a44b4a7fb30674f97f4c10c6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2fec5cc400424671a3d517327117d18a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_3687c76fe84d464baaf35366b21e83b3",
"IPY_MODEL_c29d4460dbaa441cae110b58e0014151",
"IPY_MODEL_6560449a38bf4a7bacd97ccaacf01c4c"
],
"layout": "IPY_MODEL_5fbd6ae281984d28ba59ebfd0279eda7"
}
},
"300078a9d1a6483fba81a4be63793ff7": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"323e30e275434aeea241163e5f1f9031": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3687c76fe84d464baaf35366b21e83b3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_323e30e275434aeea241163e5f1f9031",
"placeholder": "",
"style": "IPY_MODEL_48f4adec51c94f9da6e4c4564daeff84",
"value": "Loading checkpoint shards: 100%"
}
},
"48f4adec51c94f9da6e4c4564daeff84": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"54c06312ecff4e7588665e8b0cb7118b": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5fbd6ae281984d28ba59ebfd0279eda7": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6560449a38bf4a7bacd97ccaacf01c4c": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_54c06312ecff4e7588665e8b0cb7118b",
"placeholder": "",
"style": "IPY_MODEL_300078a9d1a6483fba81a4be63793ff7",
"value": " 2/2 [00:00&lt;00:00, 2.36it/s]"
}
},
"c29d4460dbaa441cae110b58e0014151": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2a672981a44b4a7fb30674f97f4c10c6",
"max": 2,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d75ae8d22ea74840b4c80c8f386384c4",
"value": 2
}
},
"d75ae8d22ea74840b4c80c8f386384c4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,224 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "JRCXfhACct4Y"
},
"source": [
"## User Based Rate Limiting Using LiteLLM\n",
"- LiteLLM allows you to set budgets per user\n",
"- Check if a given user has cross their allocated budget\n",
"\n",
"In this notebook we create a $0.0002 daily budget per user and make completion calls using the litellm budget manager"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fl1kcLG8aaIV"
},
"outputs": [],
"source": [
"!pip install litellm uuid"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "zqRrpoQ3c6oQ"
},
"source": [
"## Imports & Env variables"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "CSkz8bmwdD3w"
},
"outputs": [],
"source": [
"import uuid\n",
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "ktqe3gSmdFQ4"
},
"source": [
"## completion() with the budget manager\n",
"\n",
"This code does the following\n",
"- Initializes a litellm.BudgetManager()\n",
"- Checks if a budget exists for a user\n",
" - Creates a $0.0002 budget if the user does not exisr\n",
"- Makes a `litellm.completion()` request only if the user is under their budget"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pUN48YvmaiRU",
"outputId": "082d6a8b-9aef-4794-9eac-7ba9823ea373"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No budget exists for user: 29af95f8-c3c6-4c8c-b080-8b2d18d25432\n",
"\n",
"Creating a budget for user: 29af95f8-c3c6-4c8c-b080-8b2d18d25432, daily budget $0.0002\n",
"\n",
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0, budget for user: $0.0002\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yAUkHQV8xdfldzzZnnnuVU8pl31b\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694574378,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have emotions, but I'm here to assist you. How can I help you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 14,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
},
{
"data": {
"text/plain": [
"{'status': 'success'}"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from litellm import BudgetManager, completion\n",
"\n",
"# Initializes a litellm.BudgetManager()\n",
"budget_manager = BudgetManager(project_name=\"liteLLM_project\", client_type=\"hosted\") # see https://docs.litellm.ai/docs/budget_manager\n",
"\n",
"user_id = str(uuid.uuid4()) # create a new user id\n",
"daily_budget = 0.0002\n",
"\n",
"# Checks if a budget exists for a user\n",
"if not budget_manager.is_valid_user(user_id):\n",
" # Creates a $0.0002 budget if the user does not exisr\n",
" print(f\"No budget exists for user: {user_id}\\n\")\n",
" print(f\"Creating a budget for user: {user_id}, daily budget ${daily_budget}\\n\")\n",
" budget_manager.create_budget(total_budget=daily_budget, user=user_id, duration=\"daily\") # duration can be daily, weekly, monthly\n",
"\n",
"\n",
"# Makes a `litellm.completion()` request only if the user is under their budget\n",
"current_spend_for_user = budget_manager.get_current_cost(user=user_id)\n",
"budget_for_user = budget_manager.get_total_budget(user_id)\n",
"print(f\"User: {user_id} has spent ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
"\n",
"if current_spend_for_user <= budget_for_user:\n",
" response = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": \"Hey, how's it going?\"}])\n",
" budget_manager.update_cost(completion_obj=response, user=user_id)\n",
"else:\n",
" response = \"Sorry - no budget!\"\n",
"\n",
"print(response)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "yMOirNoBfmmc"
},
"source": [
"## Make 10 calls to cross the budget per user\n",
"- Code fails after user crossed their budget"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "apKF3H-xbFXc",
"outputId": "1c6ef0fe-e27e-4ead-adc6-2c7eb0214e44"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $7.9e-05, budget for user: $0.0002\n",
"\n",
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0.00015999999999999999, budget for user: $0.0002\n",
"\n",
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0.00023899999999999998, budget for user: $0.0002\n",
"\n",
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has exceeded budget, current spend $0.00023899999999999998, budget for user: $0.0002\n",
"\n"
]
}
],
"source": [
"user_id = \"29af95f8-c3c6-4c8c-b080-8b2d18d25432\" # set in the previous cell\n",
"\n",
"for _ in range(10):\n",
" # check if a given call can be made\n",
" current_spend_for_user = budget_manager.get_current_cost(user=user_id)\n",
" budget_for_user = budget_manager.get_total_budget(user_id)\n",
" print(f\"User: {user_id} has spent ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
" if current_spend_for_user <= budget_for_user:\n",
" response = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": \"Hey, how's it going?\"}])\n",
" budget_manager.update_cost(completion_obj=response, user=user_id)\n",
" else:\n",
" response = \"Sorry - no budget!\"\n",
" print(f\"User: {user_id} has exceeded budget, current spend ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
" break # no more requests\n",
"\n",
" # print(response)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,166 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM Batch Completions Example\n",
"\n",
"* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching"
],
"metadata": {
"id": "MbLbs1tbISk-"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ty6-ko_aDlPF"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"source": [
"## Import Batch Completion"
],
"metadata": {
"id": "KGhNJRUCIh1j"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
],
"metadata": {
"id": "LOtI43snDrSK"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
],
"metadata": {
"id": "Xhv92NBaIpaw"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"metadata": {},
"execution_count": 11
}
]
}
]
}

View file

@ -1,565 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Migrating to LiteLLM Proxy from OpenAI/Azure OpenAI\n",
"\n",
"Covers:\n",
"\n",
"* /chat/completion\n",
"* /embedding\n",
"\n",
"\n",
"These are **selected examples**. LiteLLM Proxy is **OpenAI-Compatible**, it works with any project that calls OpenAI. Just change the `base_url`, `api_key` and `model`.\n",
"\n",
"For more examples, [go here](https://docs.litellm.ai/docs/proxy/user_keys)\n",
"\n",
"To pass provider-specific args, [go here](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)\n",
"\n",
"To drop unsupported params (E.g. frequency_penalty for bedrock with librechat), [go here](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)\n"
],
"metadata": {
"id": "kccfk0mHZ4Ad"
}
},
{
"cell_type": "markdown",
"source": [
"## /chat/completion\n",
"\n"
],
"metadata": {
"id": "nmSClzCPaGH6"
}
},
{
"cell_type": "markdown",
"source": [
"### OpenAI Python SDK"
],
"metadata": {
"id": "_vqcjwOVaKpO"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "x1e_Ok3KZzeP"
},
"outputs": [],
"source": [
"import openai\n",
"client = openai.OpenAI(\n",
" api_key=\"anything\",\n",
" base_url=\"http://0.0.0.0:4000\"\n",
")\n",
"\n",
"# request sent to model set on litellm proxy, `litellm --model`\n",
"response = client.chat.completions.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"this is a test request, write a short poem\"\n",
" }\n",
" ],\n",
" extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n",
" \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n",
" \"generation_name\": \"ishaan-generation-openai-client\",\n",
" \"generation_id\": \"openai-client-gen-id22\",\n",
" \"trace_id\": \"openai-client-trace-id22\",\n",
" \"trace_user_id\": \"openai-client-user-id2\"\n",
" }\n",
" }\n",
")\n",
"\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"source": [
"## Function Calling"
],
"metadata": {
"id": "AqkyKk9Scxgj"
}
},
{
"cell_type": "code",
"source": [
"from openai import OpenAI\n",
"client = OpenAI(\n",
" api_key=\"sk-1234\", # [OPTIONAL] set if you set one on proxy, else set \"\"\n",
" base_url=\"http://0.0.0.0:4000\",\n",
")\n",
"\n",
"tools = [\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
" },\n",
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
" },\n",
" \"required\": [\"location\"],\n",
" },\n",
" }\n",
" }\n",
"]\n",
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n",
"completion = client.chat.completions.create(\n",
" model=\"gpt-4o\", # use 'model_name' from config.yaml\n",
" messages=messages,\n",
" tools=tools,\n",
" tool_choice=\"auto\"\n",
")\n",
"\n",
"print(completion)\n"
],
"metadata": {
"id": "wDg10VqLczE1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Azure OpenAI Python SDK"
],
"metadata": {
"id": "YYoxLloSaNWW"
}
},
{
"cell_type": "code",
"source": [
"import openai\n",
"client = openai.AzureOpenAI(\n",
" api_key=\"anything\",\n",
" base_url=\"http://0.0.0.0:4000\"\n",
")\n",
"\n",
"# request sent to model set on litellm proxy, `litellm --model`\n",
"response = client.chat.completions.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"this is a test request, write a short poem\"\n",
" }\n",
" ],\n",
" extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n",
" \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n",
" \"generation_name\": \"ishaan-generation-openai-client\",\n",
" \"generation_id\": \"openai-client-gen-id22\",\n",
" \"trace_id\": \"openai-client-trace-id22\",\n",
" \"trace_user_id\": \"openai-client-user-id2\"\n",
" }\n",
" }\n",
")\n",
"\n",
"print(response)"
],
"metadata": {
"id": "yA1XcgowaSRy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Langchain Python"
],
"metadata": {
"id": "yl9qhDvnaTpL"
}
},
{
"cell_type": "code",
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
")\n",
"from langchain.schema import HumanMessage, SystemMessage\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"anything\"\n",
"\n",
"chat = ChatOpenAI(\n",
" openai_api_base=\"http://0.0.0.0:4000\",\n",
" model = \"gpt-3.5-turbo\",\n",
" temperature=0.1,\n",
" extra_body={\n",
" \"metadata\": {\n",
" \"generation_name\": \"ishaan-generation-langchain-client\",\n",
" \"generation_id\": \"langchain-client-gen-id22\",\n",
" \"trace_id\": \"langchain-client-trace-id22\",\n",
" \"trace_user_id\": \"langchain-client-user-id2\"\n",
" }\n",
" }\n",
")\n",
"\n",
"messages = [\n",
" SystemMessage(\n",
" content=\"You are a helpful assistant that im using to make a test request to.\"\n",
" ),\n",
" HumanMessage(\n",
" content=\"test from litellm. tell me why it's amazing in 1 sentence\"\n",
" ),\n",
"]\n",
"response = chat(messages)\n",
"\n",
"print(response)"
],
"metadata": {
"id": "5MUZgSquaW5t"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Curl"
],
"metadata": {
"id": "B9eMgnULbRaz"
}
},
{
"cell_type": "markdown",
"source": [
"\n",
"\n",
"```\n",
"curl -X POST 'http://0.0.0.0:4000/chat/completions' \\\n",
" -H 'Content-Type: application/json' \\\n",
" -d '{\n",
" \"model\": \"gpt-3.5-turbo\",\n",
" \"messages\": [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what llm are you\"\n",
" }\n",
" ],\n",
" \"metadata\": {\n",
" \"generation_name\": \"ishaan-test-generation\",\n",
" \"generation_id\": \"gen-id22\",\n",
" \"trace_id\": \"trace-id22\",\n",
" \"trace_user_id\": \"user-id2\"\n",
" }\n",
"}'\n",
"```\n",
"\n"
],
"metadata": {
"id": "VWCCk5PFcmhS"
}
},
{
"cell_type": "markdown",
"source": [
"### LlamaIndex"
],
"metadata": {
"id": "drBAm2e1b6xe"
}
},
{
"cell_type": "code",
"source": [
"import os, dotenv\n",
"\n",
"from llama_index.llms import AzureOpenAI\n",
"from llama_index.embeddings import AzureOpenAIEmbedding\n",
"from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
"\n",
"llm = AzureOpenAI(\n",
" engine=\"azure-gpt-3.5\", # model_name on litellm proxy\n",
" temperature=0.0,\n",
" azure_endpoint=\"http://0.0.0.0:4000\", # litellm proxy endpoint\n",
" api_key=\"sk-1234\", # litellm proxy API Key\n",
" api_version=\"2023-07-01-preview\",\n",
")\n",
"\n",
"embed_model = AzureOpenAIEmbedding(\n",
" deployment_name=\"azure-embedding-model\",\n",
" azure_endpoint=\"http://0.0.0.0:4000\",\n",
" api_key=\"sk-1234\",\n",
" api_version=\"2023-07-01-preview\",\n",
")\n",
"\n",
"\n",
"documents = SimpleDirectoryReader(\"llama_index_data\").load_data()\n",
"service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n",
"index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
"\n",
"query_engine = index.as_query_engine()\n",
"response = query_engine.query(\"What did the author do growing up?\")\n",
"print(response)\n"
],
"metadata": {
"id": "d0bZcv8fb9mL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Langchain JS"
],
"metadata": {
"id": "xypvNdHnb-Yy"
}
},
{
"cell_type": "code",
"source": [
"import { ChatOpenAI } from \"@langchain/openai\";\n",
"\n",
"\n",
"const model = new ChatOpenAI({\n",
" modelName: \"gpt-4\",\n",
" openAIApiKey: \"sk-1234\",\n",
" modelKwargs: {\"metadata\": \"hello world\"} // 👈 PASS Additional params here\n",
"}, {\n",
" basePath: \"http://0.0.0.0:4000\",\n",
"});\n",
"\n",
"const message = await model.invoke(\"Hi there!\");\n",
"\n",
"console.log(message);\n"
],
"metadata": {
"id": "R55mK2vCcBN2"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### OpenAI JS"
],
"metadata": {
"id": "nC4bLifCcCiW"
}
},
{
"cell_type": "code",
"source": [
"const { OpenAI } = require('openai');\n",
"\n",
"const openai = new OpenAI({\n",
" apiKey: \"sk-1234\", // This is the default and can be omitted\n",
" baseURL: \"http://0.0.0.0:4000\"\n",
"});\n",
"\n",
"async function main() {\n",
" const chatCompletion = await openai.chat.completions.create({\n",
" messages: [{ role: 'user', content: 'Say this is a test' }],\n",
" model: 'gpt-3.5-turbo',\n",
" }, {\"metadata\": {\n",
" \"generation_name\": \"ishaan-generation-openaijs-client\",\n",
" \"generation_id\": \"openaijs-client-gen-id22\",\n",
" \"trace_id\": \"openaijs-client-trace-id22\",\n",
" \"trace_user_id\": \"openaijs-client-user-id2\"\n",
" }});\n",
"}\n",
"\n",
"main();\n"
],
"metadata": {
"id": "MICH8kIMcFpg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Anthropic SDK"
],
"metadata": {
"id": "D1Q07pEAcGTb"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"\n",
"from anthropic import Anthropic\n",
"\n",
"client = Anthropic(\n",
" base_url=\"http://localhost:4000\", # proxy endpoint\n",
" api_key=\"sk-s4xN1IiLTCytwtZFJaYQrA\", # litellm proxy virtual key\n",
")\n",
"\n",
"message = client.messages.create(\n",
" max_tokens=1024,\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello, Claude\",\n",
" }\n",
" ],\n",
" model=\"claude-3-opus-20240229\",\n",
")\n",
"print(message.content)"
],
"metadata": {
"id": "qBjFcAvgcI3t"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## /embeddings"
],
"metadata": {
"id": "dFAR4AJGcONI"
}
},
{
"cell_type": "markdown",
"source": [
"### OpenAI Python SDK"
],
"metadata": {
"id": "lgNoM281cRzR"
}
},
{
"cell_type": "code",
"source": [
"import openai\n",
"from openai import OpenAI\n",
"\n",
"# set base_url to your proxy server\n",
"# set api_key to send to proxy server\n",
"client = OpenAI(api_key=\"<proxy-api-key>\", base_url=\"http://0.0.0.0:4000\")\n",
"\n",
"response = client.embeddings.create(\n",
" input=[\"hello from litellm\"],\n",
" model=\"text-embedding-ada-002\"\n",
")\n",
"\n",
"print(response)\n"
],
"metadata": {
"id": "NY3DJhPfcQhA"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Langchain Embeddings"
],
"metadata": {
"id": "hmbg-DW6cUZs"
}
},
{
"cell_type": "code",
"source": [
"from langchain.embeddings import OpenAIEmbeddings\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"sagemaker-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
"\n",
"\n",
"text = \"This is a test document.\"\n",
"\n",
"query_result = embeddings.embed_query(text)\n",
"\n",
"print(f\"SAGEMAKER EMBEDDINGS\")\n",
"print(query_result[:5])\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"bedrock-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
"\n",
"text = \"This is a test document.\"\n",
"\n",
"query_result = embeddings.embed_query(text)\n",
"\n",
"print(f\"BEDROCK EMBEDDINGS\")\n",
"print(query_result[:5])\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"bedrock-titan-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
"\n",
"text = \"This is a test document.\"\n",
"\n",
"query_result = embeddings.embed_query(text)\n",
"\n",
"print(f\"TITAN EMBEDDINGS\")\n",
"print(query_result[:5])"
],
"metadata": {
"id": "lX2S8Nl1cWVP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Curl Request"
],
"metadata": {
"id": "oqGbWBCQcYfd"
}
},
{
"cell_type": "markdown",
"source": [
"\n",
"\n",
"```curl\n",
"curl -X POST 'http://0.0.0.0:4000/embeddings' \\\n",
" -H 'Content-Type: application/json' \\\n",
" -d ' {\n",
" \"model\": \"text-embedding-ada-002\",\n",
" \"input\": [\"write a litellm poem\"]\n",
" }'\n",
"```\n",
"\n"
],
"metadata": {
"id": "7rkIMV9LcdwQ"
}
}
]
}

View file

@ -1,478 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"This is a tutorial on using Parallel function calling with LiteLLM"
],
"metadata": {
"id": "gHwFJ-srdnku"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RrtHuVHlZmUe"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"source": [
"This tutorial walks through the steps doing parallel function calling using\n",
" - OpenAI\n",
" - Azure OpenAI"
],
"metadata": {
"id": "sG5ANaazjU0g"
}
},
{
"cell_type": "code",
"source": [
"# set openai api key\n",
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"\" # litellm reads OPENAI_API_KEY from .env and sends the request"
],
"metadata": {
"id": "l4GQ-M5yZ5UW"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"\n",
"# OpenAI gpt-3.5-turbo-1106\n",
"## Step 1: send the conversation and available functions to the model"
],
"metadata": {
"id": "AxgR2fCgaRoW"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import json\n",
"# Example dummy function hard coded to return the same weather\n",
"# In production, this could be your backend API or an external API\n",
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
" \"\"\"Get the current weather in a given location\"\"\"\n",
" if \"tokyo\" in location.lower():\n",
" return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
" elif \"san francisco\" in location.lower():\n",
" return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
" elif \"paris\" in location.lower():\n",
" return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
" else:\n",
" return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
"\n",
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
"tools = [\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
" },\n",
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
" },\n",
" \"required\": [\"location\"],\n",
" },\n",
" },\n",
" }\n",
"]\n",
"\n",
"response = litellm.completion(\n",
" model=\"gpt-3.5-turbo-1106\",\n",
" messages=messages,\n",
" tools=tools,\n",
" tool_choice=\"auto\", # auto is default, but we'll be explicit\n",
")\n",
"print(\"\\nLLM Response1:\\n\", response)\n",
"response_message = response.choices[0].message\n",
"tool_calls = response.choices[0].message.tool_calls\n",
"print(\"\\nTool Choice:\\n\", tool_calls)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Y3qteFo8ZrZP",
"outputId": "ee6c1183-55c1-4111-cdc0-967b8fed9db3"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"LLM Response1:\n",
" ModelResponse(id='chatcmpl-8MNdPbrhtnwiPK1x3PEoGwrH144TW', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')]))], created=1700344759, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage={'completion_tokens': 77, 'prompt_tokens': 88, 'total_tokens': 165}, _response_ms=1049.913)\n",
"\n",
"Tool Choice:\n",
" [ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')]\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Step 2 - Parse the Model Response and Execute Functions"
],
"metadata": {
"id": "tD4lJQ40cU44"
}
},
{
"cell_type": "code",
"source": [
"# Check if the model wants to call a function\n",
"if tool_calls:\n",
" # Execute the functions and prepare responses\n",
" available_functions = {\n",
" \"get_current_weather\": get_current_weather,\n",
" }\n",
"\n",
" messages.append(response_message) # Extend conversation with assistant's reply\n",
"\n",
" for tool_call in tool_calls:\n",
" print(f\"\\nExecuting tool call\\n{tool_call}\")\n",
" function_name = tool_call.function.name\n",
" function_to_call = available_functions[function_name]\n",
" function_args = json.loads(tool_call.function.arguments)\n",
" # calling the get_current_weather() function\n",
" function_response = function_to_call(\n",
" location=function_args.get(\"location\"),\n",
" unit=function_args.get(\"unit\"),\n",
" )\n",
" print(f\"Result from tool call\\n{function_response}\\n\")\n",
"\n",
" # Extend conversation with function response\n",
" messages.append(\n",
" {\n",
" \"tool_call_id\": tool_call.id,\n",
" \"role\": \"tool\",\n",
" \"name\": function_name,\n",
" \"content\": function_response,\n",
" }\n",
" )\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "af4oXQvicV_n",
"outputId": "abf6ac3e-4a21-4a4f-b8d7-809b763d0632"
},
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"Executing tool call\n",
"ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
"Result from tool call\n",
"{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}\n",
"\n",
"\n",
"Executing tool call\n",
"ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
"Result from tool call\n",
"{\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"}\n",
"\n",
"\n",
"Executing tool call\n",
"ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
"Result from tool call\n",
"{\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"}\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Step 3 - Second litellm.completion() call"
],
"metadata": {
"id": "E3OL1fqUdFdv"
}
},
{
"cell_type": "code",
"source": [
"second_response = litellm.completion(\n",
" model=\"gpt-3.5-turbo-1106\",\n",
" messages=messages,\n",
")\n",
"print(\"Second Response\\n\", second_response)\n",
"print(\"Second Response Message\\n\", second_response.choices[0].message.content)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8KYB2n-jc1_f",
"outputId": "6c6448ae-1c09-43ae-eb90-208b118e6179"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Second Response\n",
" ModelResponse(id='chatcmpl-8MNhat166ZqjO6egXcUh85Pd0s7KV', choices=[Choices(finish_reason='stop', index=0, message=Message(content=\"The current weather in San Francisco is 72°F, in Tokyo it's 10°C, and in Paris it's 22°C.\", role='assistant'))], created=1700345018, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage={'completion_tokens': 28, 'prompt_tokens': 465, 'total_tokens': 493}, _response_ms=999.246)\n",
"Second Response Message\n",
" The current weather in San Francisco is 72°F, in Tokyo it's 10°C, and in Paris it's 22°C.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Using Azure OpenAI"
],
"metadata": {
"id": "1cIIFEvXjofp"
}
},
{
"cell_type": "code",
"source": [
"# set Azure env variables\n",
"import os\n",
"os.environ['AZURE_API_KEY'] = \"\" # litellm reads AZURE_API_KEY from .env and sends the request\n",
"os.environ['AZURE_API_BASE'] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ['AZURE_API_VERSION'] = \"2023-07-01-preview\""
],
"metadata": {
"id": "lG9mUnModeeE"
},
"execution_count": 32,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Step 1"
],
"metadata": {
"id": "17S-Ysksj-E_"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import json\n",
"# Example dummy function hard coded to return the same weather\n",
"# In production, this could be your backend API or an external API\n",
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
" \"\"\"Get the current weather in a given location\"\"\"\n",
" if \"tokyo\" in location.lower():\n",
" return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
" elif \"san francisco\" in location.lower():\n",
" return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
" elif \"paris\" in location.lower():\n",
" return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
" else:\n",
" return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
"\n",
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
"tools = [\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
" },\n",
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
" },\n",
" \"required\": [\"location\"],\n",
" },\n",
" },\n",
" }\n",
"]\n",
"\n",
"response = litellm.completion(\n",
" model=\"azure/chatgpt-functioncalling\", # model = azure/<your-azure-deployment-name>\n",
" messages=messages,\n",
" tools=tools,\n",
" tool_choice=\"auto\", # auto is default, but we'll be explicit\n",
")\n",
"print(\"\\nLLM Response1:\\n\", response)\n",
"response_message = response.choices[0].message\n",
"tool_calls = response.choices[0].message.tool_calls\n",
"print(\"\\nTool Choice:\\n\", tool_calls)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "boAIHLEXj80m",
"outputId": "00afcf09-5b6b-4805-c374-ba089cc6eb43"
},
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"LLM Response1:\n",
" ModelResponse(id='chatcmpl-8MOBPvEnqG7qitkmVqZmCrzSGEmDj', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')]))], created=1700346867, model='gpt-35-turbo', object='chat.completion', system_fingerprint=None, usage={'completion_tokens': 19, 'prompt_tokens': 88, 'total_tokens': 107}, _response_ms=833.4319999999999)\n",
"\n",
"Tool Choice:\n",
" [ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')]\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Step 2"
],
"metadata": {
"id": "hqh1y1IMkmGO"
}
},
{
"cell_type": "code",
"source": [
"# Check if the model wants to call a function\n",
"if tool_calls:\n",
" # Execute the functions and prepare responses\n",
" available_functions = {\n",
" \"get_current_weather\": get_current_weather,\n",
" }\n",
"\n",
" messages.append(response_message) # Extend conversation with assistant's reply\n",
"\n",
" for tool_call in tool_calls:\n",
" print(f\"\\nExecuting tool call\\n{tool_call}\")\n",
" function_name = tool_call.function.name\n",
" function_to_call = available_functions[function_name]\n",
" function_args = json.loads(tool_call.function.arguments)\n",
" # calling the get_current_weather() function\n",
" function_response = function_to_call(\n",
" location=function_args.get(\"location\"),\n",
" unit=function_args.get(\"unit\"),\n",
" )\n",
" print(f\"Result from tool call\\n{function_response}\\n\")\n",
"\n",
" # Extend conversation with function response\n",
" messages.append(\n",
" {\n",
" \"tool_call_id\": tool_call.id,\n",
" \"role\": \"tool\",\n",
" \"name\": function_name,\n",
" \"content\": function_response,\n",
" }\n",
" )\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FGu7DY7PkOiG",
"outputId": "96d39ae7-7fc8-4dd8-c82f-5ee9a486724c"
},
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"Executing tool call\n",
"ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')\n",
"Result from tool call\n",
"{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Step 3"
],
"metadata": {
"id": "4MjYyeajkpBl"
}
},
{
"cell_type": "code",
"source": [
"second_response = litellm.completion(\n",
" model=\"azure/chatgpt-functioncalling\",\n",
" messages=messages,\n",
")\n",
"print(\"Second Response\\n\", second_response)\n",
"print(\"Second Response Message\\n\", second_response.choices[0].message.content)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "qHgXyZq1kqGn",
"outputId": "61a30470-d7f5-484d-c42b-681c9b60b34a"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Second Response\n",
" ModelResponse(id='chatcmpl-8MOC90vwZ2LHX0DE796XYtsOxdGcc', choices=[Choices(finish_reason='stop', index=0, message=Message(content='The current weather in San Francisco is 72°F.', role='assistant'))], created=1700346913, model='gpt-35-turbo', object='chat.completion', system_fingerprint=None, usage={'completion_tokens': 11, 'prompt_tokens': 69, 'total_tokens': 80}, _response_ms=824.882)\n",
"Second Response Message\n",
" The current weather in San Francisco is 72°F.\n"
]
}
]
}
]
}

View file

@ -1,204 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "680oRk1af-xJ"
},
"source": [
"# Environment Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx, json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because it is too large Load diff

View file

@ -1,159 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Using Nemo-Guardrails with LiteLLM Server\n",
"\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
],
"metadata": {
"id": "eKXncoQbU_2j"
}
},
{
"cell_type": "markdown",
"source": [
"## Using with Bedrock\n",
"\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
],
"metadata": {
"id": "ZciYaLwvuFbu"
}
},
{
"cell_type": "code",
"source": [
"pip install nemoguardrails langchain"
],
"metadata": {
"id": "vOUwGSJ2Vsy3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xXEJNxe7U0IN"
},
"outputs": [],
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"source": [
"## Using with TogetherAI\n",
"\n",
"1. You can either set this in the server environment:\n",
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
],
"metadata": {
"id": "vz5n00qyuKjp"
}
},
{
"cell_type": "code",
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
],
"metadata": {
"id": "XK1sk-McuhpE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### CONFIG.YML\n",
"\n",
"save this example `config.yml` in your current directory"
],
"metadata": {
"id": "8A1KWKnzuxAS"
}
},
{
"cell_type": "code",
"source": [
"# instructions:\n",
"# - type: general\n",
"# content: |\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n",
"# question, it truthfully says it does not know.\n",
"\n",
"# sample_conversation: |\n",
"# user \"Hello there!\"\n",
"# express greeting\n",
"# bot express greeting\n",
"# \"Hello! How can I assist you today?\"\n",
"# user \"What can you do for me?\"\n",
"# ask about capabilities\n",
"# bot respond about capabilities\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# user \"What's 2+2?\"\n",
"# ask math question\n",
"# bot responds to math question\n",
"# \"2+2 is equal to 4.\"\n",
"\n",
"# models:\n",
"# - type: main\n",
"# engine: openai\n",
"# model: claude-instant-1"
],
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"execution_count": null,
"outputs": []
}
]
}

View file

@ -1,404 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm",
"gpuType": "V100"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Set up Environment"
],
"metadata": {
"id": "vDOm5wfjdFLP"
}
},
{
"cell_type": "code",
"source": [
"!pip install --upgrade litellm"
],
"metadata": {
"id": "Bx6mAA6MHiy_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zIYv7JTyxSxR",
"outputId": "53890320-f9fa-4bf4-8362-0f17f52c6ed4"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Successfully installed fastapi-0.103.1 h11-0.14.0 huggingface-hub-0.16.4 ninja-1.11.1 pydantic-1.10.12 ray-2.6.3 safetensors-0.3.3 sentencepiece-0.1.99 starlette-0.27.0 tokenizers-0.13.3 transformers-4.33.1 uvicorn-0.23.2 vllm-0.1.4 xformers-0.0.21\n"
]
}
],
"source": [
"!pip install vllm"
]
},
{
"cell_type": "markdown",
"source": [
"# Load the Logs"
],
"metadata": {
"id": "RMcoAni6WKEx"
}
},
{
"cell_type": "code",
"source": [
"import pandas as pd"
],
"metadata": {
"id": "zchxB8c7WJe5"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# path of the csv file\n",
"file_path = 'Model-prompts-example.csv'\n",
"\n",
"# load the csv file as a pandas DataFrame\n",
"data = pd.read_csv(file_path)\n",
"\n",
"data.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81
},
"id": "aKcWr015WNPm",
"outputId": "6e226773-333f-46a2-9fc8-4f54f309d204"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Success Timestamp Input \\\n",
"0 True 1694041195 This is the templated query input \n",
"\n",
" Output RunId (Wandb Runid) \\\n",
"0 This is the query output from the model 8hlumwuk \n",
"\n",
" Model ID (or Name) \n",
"0 OpenAI/Turbo-3.5 "
],
"text/html": [
"\n",
" <div id=\"df-cd06d09e-fb43-41b0-938f-37f9d285ae66\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Success</th>\n",
" <th>Timestamp</th>\n",
" <th>Input</th>\n",
" <th>Output</th>\n",
" <th>RunId (Wandb Runid)</th>\n",
" <th>Model ID (or Name)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>True</td>\n",
" <td>1694041195</td>\n",
" <td>This is the templated query input</td>\n",
" <td>This is the query output from the model</td>\n",
" <td>8hlumwuk</td>\n",
" <td>OpenAI/Turbo-3.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cd06d09e-fb43-41b0-938f-37f9d285ae66')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
" </div>\n",
" </div>\n"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"input_texts = data['Input'].values"
],
"metadata": {
"id": "0DbL-kirWUyn"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"messages = [[{\"role\": \"user\", \"content\": input_text}] for input_text in input_texts]"
],
"metadata": {
"id": "cqpAvy8hWXyC"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Running Inference"
],
"metadata": {
"id": "SugCyom0Xy8U"
}
},
{
"cell_type": "code",
"source": [
"from litellm import batch_completion\n",
"model_name = \"facebook/opt-125m\"\n",
"provider = \"vllm\"\n",
"response_list = batch_completion(\n",
" model=model_name,\n",
" custom_llm_provider=provider, # can easily switch to huggingface, replicate, together ai, sagemaker, etc.\n",
" messages=messages,\n",
" temperature=0.2,\n",
" max_tokens=80,\n",
" )"
],
"metadata": {
"id": "qpikx3uxHns3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"response_list"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QDPikHtwKJJ2",
"outputId": "06f47c44-e258-452a-f9db-232a5b6d2810"
},
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[<ModelResponse at 0x7e5b87616750> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \".\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694053363.6139505,\n",
" \"model\": \"facebook/opt-125m\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 9,\n",
" \"completion_tokens\": 80,\n",
" \"total_tokens\": 89\n",
" }\n",
" }]"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"source": [
"response_values = [response['choices'][0]['message']['content'] for response in response_list]"
],
"metadata": {
"id": "SYqTcCiJbQDF"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"response_values"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wqs-Oy9FbiPo",
"outputId": "16a6a7b7-97c8-4b5b-eff8-09ea5eb5ad06"
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is']"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"data[f\"{model_name}_output\"] = response_values"
],
"metadata": {
"id": "mElNbBehbkrz"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data.to_csv('model_responses.csv', index=False)"
],
"metadata": {
"id": "F06NXssDc45k"
},
"execution_count": 14,
"outputs": []
}
]
}

View file

@ -1,90 +0,0 @@
from litellm import completion, completion_cost
import time
import click
from tqdm import tqdm
from tabulate import tabulate
from termcolor import colored
import os
# Define the list of models to benchmark
# select any LLM listed here: https://docs.litellm.ai/docs/providers
models = ["gpt-3.5-turbo", "claude-2"]
# Enter LLM API keys
# https://docs.litellm.ai/docs/providers
os.environ["OPENAI_API_KEY"] = ""
os.environ["ANTHROPIC_API_KEY"] = ""
# List of questions to benchmark (replace with your questions)
questions = ["When will BerriAI IPO?", "When will LiteLLM hit $100M ARR?"]
# Enter your system prompt here
system_prompt = """
You are LiteLLMs helpful assistant
"""
@click.command()
@click.option(
"--system-prompt",
default="You are a helpful assistant that can answer questions.",
help="System prompt for the conversation.",
)
def main(system_prompt):
for question in questions:
data = [] # Data for the current question
with tqdm(total=len(models)) as pbar:
for model in models:
colored_description = colored(
f"Running question: {question} for model: {model}", "green"
)
pbar.set_description(colored_description)
start_time = time.time()
response = completion(
model=model,
max_tokens=500,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
],
)
end = time.time()
total_time = end - start_time
cost = completion_cost(completion_response=response)
raw_response = response["choices"][0]["message"]["content"]
data.append(
{
"Model": colored(model, "light_blue"),
"Response": raw_response, # Colorize the response
"ResponseTime": colored(f"{total_time:.2f} seconds", "red"),
"Cost": colored(f"${cost:.6f}", "green"), # Colorize the cost
}
)
pbar.update(1)
# Separate headers from the data
headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"]
colwidths = [15, 80, 15, 10]
# Create a nicely formatted table for the current question
table = tabulate(
[list(d.values()) for d in data],
headers,
tablefmt="grid",
maxcolwidths=colwidths,
)
# Print the table for the current question
colored_question = colored(question, "green")
click.echo(f"\nBenchmark Results for '{colored_question}':")
click.echo(table) # Display the formatted table
if __name__ == "__main__":
main()

View file

@ -1,34 +0,0 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import *
###################
import litellm
# litellm completion call
question = "which country has the highest population"
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
)
print(response)
# use the auto eval Factuality() evaluator
print("calling evaluator")
evaluator = Factuality()
result = evaluator(
output=response.choices[0]["message"][
"content"
], # response from litellm.completion()
expected="India", # expected output
input=question, # question passed to litellm.completion
)
print(result)

View file

@ -1,181 +0,0 @@
<h1 align="center">
LLM-Bench
</h1>
<p align="center">
<p align="center">Benchmark LLMs response, cost and response time</p>
<p>LLM vs Cost per input + output token ($)</p>
<img width="806" alt="Screenshot 2023-11-13 at 2 51 06 PM" src="https://github.com/BerriAI/litellm/assets/29436595/6d1bed71-d062-40b8-a113-28359672636a">
</p>
<a href="https://docs.google.com/spreadsheets/d/1mvPbP02OLFgc-5-Ubn1KxGuQQdbMyG1jhMSWxAldWy4/edit?usp=sharing">
Bar Graph Excel Sheet here
</a>
| Model | Provider | Cost per input + output token ($)|
| --- | --- | --- |
| openrouter/mistralai/mistral-7b-instruct | openrouter | 0.0 |
| ollama/llama2 | ollama | 0.0 |
| ollama/llama2:13b | ollama | 0.0 |
| ollama/llama2:70b | ollama | 0.0 |
| ollama/llama2-uncensored | ollama | 0.0 |
| ollama/mistral | ollama | 0.0 |
| ollama/codellama | ollama | 0.0 |
| ollama/orca-mini | ollama | 0.0 |
| ollama/vicuna | ollama | 0.0 |
| perplexity/codellama-34b-instruct | perplexity | 0.0 |
| perplexity/llama-2-13b-chat | perplexity | 0.0 |
| perplexity/llama-2-70b-chat | perplexity | 0.0 |
| perplexity/mistral-7b-instruct | perplexity | 0.0 |
| perplexity/replit-code-v1.5-3b | perplexity | 0.0 |
| text-bison | vertex_ai-text-models | 0.00000025 |
| text-bison@001 | vertex_ai-text-models | 0.00000025 |
| chat-bison | vertex_ai-chat-models | 0.00000025 |
| chat-bison@001 | vertex_ai-chat-models | 0.00000025 |
| chat-bison-32k | vertex_ai-chat-models | 0.00000025 |
| code-bison | vertex_ai-code-text-models | 0.00000025 |
| code-bison@001 | vertex_ai-code-text-models | 0.00000025 |
| code-gecko@001 | vertex_ai-chat-models | 0.00000025 |
| code-gecko@latest | vertex_ai-chat-models | 0.00000025 |
| codechat-bison | vertex_ai-code-chat-models | 0.00000025 |
| codechat-bison@001 | vertex_ai-code-chat-models | 0.00000025 |
| codechat-bison-32k | vertex_ai-code-chat-models | 0.00000025 |
| palm/chat-bison | palm | 0.00000025 |
| palm/chat-bison-001 | palm | 0.00000025 |
| palm/text-bison | palm | 0.00000025 |
| palm/text-bison-001 | palm | 0.00000025 |
| palm/text-bison-safety-off | palm | 0.00000025 |
| palm/text-bison-safety-recitation-off | palm | 0.00000025 |
| anyscale/meta-llama/Llama-2-7b-chat-hf | anyscale | 0.0000003 |
| anyscale/mistralai/Mistral-7B-Instruct-v0.1 | anyscale | 0.0000003 |
| openrouter/meta-llama/llama-2-13b-chat | openrouter | 0.0000004 |
| openrouter/nousresearch/nous-hermes-llama2-13b | openrouter | 0.0000004 |
| deepinfra/meta-llama/Llama-2-7b-chat-hf | deepinfra | 0.0000004 |
| deepinfra/mistralai/Mistral-7B-Instruct-v0.1 | deepinfra | 0.0000004 |
| anyscale/meta-llama/Llama-2-13b-chat-hf | anyscale | 0.0000005 |
| amazon.titan-text-lite-v1 | bedrock | 0.0000007 |
| deepinfra/meta-llama/Llama-2-13b-chat-hf | deepinfra | 0.0000007 |
| text-babbage-001 | text-completion-openai | 0.0000008 |
| text-ada-001 | text-completion-openai | 0.0000008 |
| babbage-002 | text-completion-openai | 0.0000008 |
| openrouter/google/palm-2-chat-bison | openrouter | 0.000001 |
| openrouter/google/palm-2-codechat-bison | openrouter | 0.000001 |
| openrouter/meta-llama/codellama-34b-instruct | openrouter | 0.000001 |
| deepinfra/codellama/CodeLlama-34b-Instruct-hf | deepinfra | 0.0000012 |
| deepinfra/meta-llama/Llama-2-70b-chat-hf | deepinfra | 0.0000016499999999999999 |
| deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1 | deepinfra | 0.0000016499999999999999 |
| anyscale/meta-llama/Llama-2-70b-chat-hf | anyscale | 0.000002 |
| anyscale/codellama/CodeLlama-34b-Instruct-hf | anyscale | 0.000002 |
| gpt-3.5-turbo-1106 | openai | 0.000003 |
| openrouter/meta-llama/llama-2-70b-chat | openrouter | 0.000003 |
| amazon.titan-text-express-v1 | bedrock | 0.000003 |
| gpt-3.5-turbo | openai | 0.0000035 |
| gpt-3.5-turbo-0301 | openai | 0.0000035 |
| gpt-3.5-turbo-0613 | openai | 0.0000035 |
| gpt-3.5-turbo-instruct | text-completion-openai | 0.0000035 |
| openrouter/openai/gpt-3.5-turbo | openrouter | 0.0000035 |
| cohere.command-text-v14 | bedrock | 0.0000035 |
| gpt-3.5-turbo-0613 | openai | 0.0000035 |
| claude-instant-1 | anthropic | 0.00000714 |
| claude-instant-1.2 | anthropic | 0.00000714 |
| openrouter/anthropic/claude-instant-v1 | openrouter | 0.00000714 |
| anthropic.claude-instant-v1 | bedrock | 0.00000714 |
| openrouter/mancer/weaver | openrouter | 0.00001125 |
| j2-mid | ai21 | 0.00002 |
| ai21.j2-mid-v1 | bedrock | 0.000025 |
| openrouter/jondurbin/airoboros-l2-70b-2.1 | openrouter | 0.00002775 |
| command-nightly | cohere | 0.00003 |
| command | cohere | 0.00003 |
| command-light | cohere | 0.00003 |
| command-medium-beta | cohere | 0.00003 |
| command-xlarge-beta | cohere | 0.00003 |
| command-r-plus| cohere | 0.000018 |
| j2-ultra | ai21 | 0.00003 |
| ai21.j2-ultra-v1 | bedrock | 0.0000376 |
| gpt-4-1106-preview | openai | 0.00004 |
| gpt-4-vision-preview | openai | 0.00004 |
| claude-2 | anthropic | 0.0000437 |
| openrouter/anthropic/claude-2 | openrouter | 0.0000437 |
| anthropic.claude-v1 | bedrock | 0.0000437 |
| anthropic.claude-v2 | bedrock | 0.0000437 |
| gpt-4 | openai | 0.00009 |
| gpt-4-0314 | openai | 0.00009 |
| gpt-4-0613 | openai | 0.00009 |
| openrouter/openai/gpt-4 | openrouter | 0.00009 |
| gpt-4-32k | openai | 0.00018 |
| gpt-4-32k-0314 | openai | 0.00018 |
| gpt-4-32k-0613 | openai | 0.00018 |
## Setup:
```
git clone https://github.com/BerriAI/litellm
```
cd to `benchmark` dir
```
cd litellm/cookbook/benchmark
```
### Install Dependencies
```
pip install litellm click tqdm tabulate termcolor
```
### Configuration
In `benchmark/benchmark.py` select your LLMs, LLM API Key and questions
Supported LLMs: https://docs.litellm.ai/docs/providers
```python
# Define the list of models to benchmark
models = ['gpt-3.5-turbo', 'togethercomputer/llama-2-70b-chat', 'claude-2']
# Enter LLM API keys
os.environ['OPENAI_API_KEY'] = ""
os.environ['ANTHROPIC_API_KEY'] = ""
os.environ['TOGETHERAI_API_KEY'] = ""
# List of questions to benchmark (replace with your questions)
questions = [
"When will BerriAI IPO?",
"When will LiteLLM hit $100M ARR?"
]
```
## Run LLM-Bench
```
python3 benchmark.py
```
## Expected Output
```
Running question: When will BerriAI IPO? for model: claude-2: 100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:13<00:00, 4.41s/it]
Benchmark Results for 'When will BerriAI IPO?':
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
| Model | Response | Response Time (seconds) | Cost ($) |
+=================+==================================================================================+===========================+============+
| gpt-3.5-turbo | As an AI language model, I cannot provide up-to-date information or predict | 1.55 seconds | $0.000122 |
| | future events. It is best to consult a reliable financial source or contact | | |
| | BerriAI directly for information regarding their IPO plans. | | |
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
| togethercompute | I'm not able to provide information about future IPO plans or dates for BerriAI | 8.52 seconds | $0.000531 |
| r/llama-2-70b-c | or any other company. IPO (Initial Public Offering) plans and timelines are | | |
| hat | typically kept private by companies until they are ready to make a public | | |
| | announcement. It's important to note that IPO plans can change and are subject | | |
| | to various factors, such as market conditions, financial performance, and | | |
| | regulatory approvals. Therefore, it's difficult to predict with certainty when | | |
| | BerriAI or any other company will go public. If you're interested in staying | | |
| | up-to-date with BerriAI's latest news and developments, you may want to follow | | |
| | their official social media accounts, subscribe to their newsletter, or visit | | |
| | their website periodically for updates. | | |
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
| claude-2 | I do not have any information about when or if BerriAI will have an initial | 3.17 seconds | $0.002084 |
| | public offering (IPO). As an AI assistant created by Anthropic to be helpful, | | |
| | harmless, and honest, I do not have insider knowledge about Anthropic's business | | |
| | plans or strategies. | | |
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
```
## Support
**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.

View file

@ -1,154 +0,0 @@
# CodeLlama Server: Streaming, Caching, Model Fallbacks (OpenAI + Anthropic), Prompt-tracking
Works with: Anthropic, Huggingface, Cohere, TogetherAI, Azure, OpenAI, etc.
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/HuDPw-?referralCode=jch2ME)
**LIVE DEMO** - https://litellm.ai/playground
## What does CodeLlama Server do
- Uses Together AI's CodeLlama to answer coding questions, with GPT-4 + Claude-2 as backups (you can easily switch this to any model from Huggingface, Replicate, Cohere, AI21, Azure, OpenAI, etc.)
- Sets default system prompt for guardrails `system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."`
- Integrates with Promptlayer for model + prompt tracking
- Example output
<img src="imgs/code-output.png" alt="Code Output" width="600"/>
- **Consistent Input/Output** Format
- Call all models using the OpenAI format - `completion(model, messages)`
- Text responses will always be available at `['choices'][0]['message']['content']`
- Stream responses will always be available at `['choices'][0]['delta']['content']`
- **Error Handling** Using Model Fallbacks (if `CodeLlama` fails, try `GPT-4`) with cooldowns, and retries
- **Prompt Logging** - Log successful completions to promptlayer for testing + iterating on your prompts in production! (Learn more: https://litellm.readthedocs.io/en/latest/advanced/
**Example: Logs sent to PromptLayer**
<img src="imgs/promptlayer_logging.png" alt="Prompt Logging" width="900"/>
- **Token Usage & Spend** - Track Input + Completion tokens used + Spend/model - https://docs.litellm.ai/docs/token_usage
- **Caching** - Provides in-memory cache + GPT-Cache integration for more advanced usage - https://docs.litellm.ai/docs/caching/gpt_cache
- **Streaming & Async Support** - Return generators to stream text responses - TEST IT 👉 https://litellm.ai/
## API Endpoints
### `/chat/completions` (POST)
This endpoint is used to generate chat completions for 50+ support LLM API Models. Use llama2, GPT-4, Claude2 etc
#### Input
This API endpoint accepts all inputs in raw JSON and expects the following inputs
- `prompt` (string, required): The user's coding related question
- Additional Optional parameters: `temperature`, `functions`, `function_call`, `top_p`, `n`, `stream`. See the full list of supported inputs here: https://litellm.readthedocs.io/en/latest/input/
#### Example JSON body
For claude-2
```json
{
"prompt": "write me a function to print hello world"
}
```
### Making an API request to the Code-Gen Server
```python
import requests
import json
url = "localhost:4000/chat/completions"
payload = json.dumps({
"prompt": "write me a function to print hello world"
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
```
### Output [Response Format]
Responses from the server are given in the following format.
All responses from the server are returned in the following format (for all LLM models). More info on output here: https://litellm.readthedocs.io/en/latest/output/
```json
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": ".\n\n```\ndef print_hello_world():\n print(\"hello world\")\n",
"role": "assistant"
}
}
],
"created": 1693279694.6474009,
"model": "togethercomputer/CodeLlama-34b-Instruct",
"usage": {
"completion_tokens": 14,
"prompt_tokens": 28,
"total_tokens": 42
}
}
```
## Installation & Usage
### Running Locally
1. Clone liteLLM repository to your local machine:
```
git clone https://github.com/BerriAI/litellm-CodeLlama-server
```
2. Install the required dependencies using pip
```
pip install requirements.txt
```
3. Set your LLM API keys
```
os.environ['OPENAI_API_KEY]` = "YOUR_API_KEY"
or
set OPENAI_API_KEY in your .env file
```
4. Run the server:
```
python main.py
```
## Deploying
1. Quick Start: Deploy on Railway
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/HuDPw-?referralCode=jch2ME)
2. `GCP`, `AWS`, `Azure`
This project includes a `Dockerfile` allowing you to build and deploy a Docker Project on your providers
# Support / Talk with founders
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap
- [ ] Implement user-based rate-limiting
- [ ] Spending controls per project - expose key creation endpoint
- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 293 KiB

View file

@ -1,101 +0,0 @@
import traceback
from flask import Flask, request, jsonify, abort, Response
from flask_cors import CORS
import traceback
import litellm
from util import handle_error
from litellm import completion
import os, dotenv, time
import json
dotenv.load_dotenv()
# TODO: set your keys in .env or here:
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
# os.environ["ANTHROPIC_API_KEY"] = "" # set your anthropic key here
# os.environ["TOGETHER_AI_API_KEY"] = "" # set your together ai key here
# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
######### ENVIRONMENT VARIABLES ##########
verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ##########
os.environ[
"PROMPTLAYER_API_KEY"
] = "" # set your promptlayer key here - https://promptlayer.com/
# set callbacks
litellm.success_callback = ["promptlayer"]
############ HELPER FUNCTIONS ###################################
def print_verbose(print_statement):
if verbose:
print(print_statement)
app = Flask(__name__)
CORS(app)
@app.route("/")
def index():
return "received!", 200
def data_generator(response):
for chunk in response:
yield f"data: {json.dumps(chunk)}\n\n"
@app.route("/chat/completions", methods=["POST"])
def api_completion():
data = request.json
start_time = time.time()
if data.get("stream") == "True":
data["stream"] = True # convert to boolean
try:
if "prompt" not in data:
raise ValueError("data needs to have prompt")
data[
"model"
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
# COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": data.pop("prompt")},
]
data["messages"] = messages
print(f"data: {data}")
response = completion(**data)
## LOG SUCCESS
end_time = time.time()
if (
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e:
# call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE
end_time = time.time()
traceback_exception = traceback.format_exc()
return handle_error(data=data)
return response
@app.route("/get_models", methods=["POST"])
def get_models():
try:
return litellm.model_list
except Exception as e:
traceback.print_exc()
response = {"error": str(e)}
return response, 200
if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=4000, threads=500)

View file

@ -1,90 +0,0 @@
import requests
from urllib.parse import urlparse, parse_qs
def get_next_url(response):
"""
Function to get 'next' url from Link header
:param response: response from requests
:return: next url or None
"""
if "link" not in response.headers:
return None
headers = response.headers
next_url = headers["Link"]
print(next_url)
start_index = next_url.find("<")
end_index = next_url.find(">")
return next_url[1:end_index]
def get_models(url):
"""
Function to retrieve all models from paginated endpoint
:param url: base url to make GET request
:return: list of all models
"""
models = []
while url:
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to retrieve data. Status code: {response.status_code}")
return models
payload = response.json()
url = get_next_url(response)
models.extend(payload)
return models
def get_cleaned_models(models):
"""
Function to clean retrieved models
:param models: list of retrieved models
:return: list of cleaned models
"""
cleaned_models = []
for model in models:
cleaned_models.append(model["id"])
return cleaned_models
# Get text-generation models
url = "https://huggingface.co/api/models?filter=text-generation-inference"
text_generation_models = get_models(url)
cleaned_text_generation_models = get_cleaned_models(text_generation_models)
print(cleaned_text_generation_models)
# Get conversational models
url = "https://huggingface.co/api/models?filter=conversational"
conversational_models = get_models(url)
cleaned_conversational_models = get_cleaned_models(conversational_models)
print(cleaned_conversational_models)
def write_to_txt(cleaned_models, filename):
"""
Function to write the contents of a list to a text file
:param cleaned_models: list of cleaned models
:param filename: name of the text file
"""
with open(filename, "w") as f:
for item in cleaned_models:
f.write("%s\n" % item)
# Write contents of cleaned_text_generation_models to text_generation_models.txt
write_to_txt(
cleaned_text_generation_models,
"huggingface_llms_metadata/hf_text_generation_models.txt",
)
# Write contents of cleaned_conversational_models to conversational_models.txt
write_to_txt(
cleaned_conversational_models,
"huggingface_llms_metadata/hf_conversational_models.txt",
)

View file

@ -1,93 +0,0 @@
{
"gpt-3.5-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002
},
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004
},
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000608,
"output_cost_per_token": 0.00000608
},
"together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001
},
"together-ai-3.1b-7b": {
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002
},
"together-ai-7.1b-20b": {
"max_tokens": 1000,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000004
},
"together-ai-20.1b-40b": {
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001
},
"together-ai-40.1b-70b": {
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000003
}
}

View file

@ -1,251 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM A121 Tutorial\n",
"\n",
"This walks through using A121 Jurassic models\n",
"* j2-light\n",
"* j2-mid\n",
"* j2-ultra"
],
"metadata": {
"id": "LeFYo8iqcn5g"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GslPQFmaZsp-"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"source": [
"from litellm import completion\n",
"import os"
],
"metadata": {
"id": "P3cKiqURZx7P"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Set A121 Keys\n",
"You can get a free key from https://studio.ai21.com/account/api-key"
],
"metadata": {
"id": "tmTvA1_GaNU4"
}
},
{
"cell_type": "code",
"source": [
"os.environ[\"AI21_API_KEY\"] = \"\""
],
"metadata": {
"id": "_xX8LmxAZ2vp"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# A121 Supported Models:\n",
"https://studio.ai21.com/foundation-models"
],
"metadata": {
"id": "Fx5ZfJTLbF0A"
}
},
{
"cell_type": "markdown",
"source": [
"## J2-light Call"
],
"metadata": {
"id": "H0tl-0Z3bDaL"
}
},
{
"cell_type": "code",
"source": [
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
"response = completion(model=\"j2-light\", messages=messages)\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DZnApsJUZ_I2",
"outputId": "b5707cbe-f67c-47f7-bac5-a7b8af1ba815"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<ModelResponse at 0x7b2c2902e610> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" However, I have an important question to ask you\\nMy name is X, and I was wondering if you would be willing to help me.\",\n",
" \"role\": \"assistant\"\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1692761063.5189915,\n",
" \"model\": \"j2-light\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": null,\n",
" \"completion_tokens\": null,\n",
" \"total_tokens\": null\n",
" }\n",
"}"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "markdown",
"source": [
"# J2-Mid"
],
"metadata": {
"id": "wCcnrYnnbMQA"
}
},
{
"cell_type": "code",
"source": [
"messages = [{ \"content\": \"what model are you\",\"role\": \"user\"}]\n",
"response = completion(model=\"j2-mid\", messages=messages)\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-5Sxf4blaeEl",
"outputId": "6264a5e8-16d6-44a3-e167-9e0c59b6dbc4"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<ModelResponse at 0x7b2c2902f6a0> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"\\nplease choose the model from the list below\\nModel view in Tekla Structures\",\n",
" \"role\": \"assistant\"\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1692761140.0017524,\n",
" \"model\": \"j2-mid\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": null,\n",
" \"completion_tokens\": null,\n",
" \"total_tokens\": null\n",
" }\n",
"}"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"source": [
"# J2-Ultra"
],
"metadata": {
"id": "wDARpjxtbUcg"
}
},
{
"cell_type": "code",
"source": [
"messages = [{ \"content\": \"what model are you\",\"role\": \"user\"}]\n",
"response = completion(model=\"j2-ultra\", messages=messages)\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i228xwsYbSYo",
"outputId": "3765ac56-5a9b-442e-b357-2e346d02e1df"
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<ModelResponse at 0x7b2c28fd4090> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \"\\nI am not a specific model, but I can provide information and assistance based on my training data. Please let me know if there is anything you\",\n",
" \"role\": \"assistant\"\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1692761157.8675153,\n",
" \"model\": \"j2-ultra\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": null,\n",
" \"completion_tokens\": null,\n",
" \"total_tokens\": null\n",
" }\n",
"}"
]
},
"metadata": {},
"execution_count": 8
}
]
}
]
}

View file

@ -1,238 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
],
"metadata": {
"id": "gZx-wHJapG5w"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"import litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "VEukLhDzo4vw"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": {
"id": "4STYM2OHFNlc"
}
},
{
"cell_type": "code",
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
],
"metadata": {
"id": "DorpLxw1FHbC"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "syF3dTdKFSQQ"
}
},
{
"cell_type": "code",
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "7n21UroEGCGa"
}
},
{
"cell_type": "code",
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "6-TFwmPAGPXq"
}
},
{
"cell_type": "code",
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"metadata": {},
"execution_count": 20
}
]
}
]
}

View file

@ -1,411 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "MZ01up0p7wOJ"
},
"source": [
"## 🚅 liteLLM Quick Start Demo\n",
"### TLDR: Call 50+ LLM APIs using chatGPT Input/Output format\n",
"https://github.com/BerriAI/litellm\n",
"\n",
"liteLLM is package to simplify calling **OpenAI, Azure, Llama2, Cohere, Anthropic, Huggingface API Endpoints**. LiteLLM manages\n",
"\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "RZtzCnQS7rW-"
},
"source": [
"## Installation and setting Params"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rsrN5W-N7L8d"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "ArrWyG5b7QAG"
},
"outputs": [],
"source": [
"from litellm import completion\n",
"import os"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "bbhJRt34_NJ1"
},
"source": [
"## Set your API keys\n",
"- liteLLM reads your .env, env variables or key manager for Auth\n",
"\n",
"Set keys for the models you want to use below"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "-h8Ga5cR7SvV"
},
"outputs": [],
"source": [
"# Only set keys for the LLMs you want to use\n",
"os.environ['OPENAI_API_KEY'] = \"\" #@param\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = \"\" #@param\n",
"os.environ[\"REPLICATE_API_KEY\"] = \"\" #@param\n",
"os.environ[\"COHERE_API_KEY\"] = \"\" #@param\n",
"os.environ[\"AZURE_API_BASE\"] = \"\" #@param\n",
"os.environ[\"AZURE_API_VERSION\"] = \"\" #@param\n",
"os.environ[\"AZURE_API_KEY\"] = \"\" #@param"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "fhqpKv6L8fBj"
},
"source": [
"## Call chatGPT"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "speIkoX_8db4",
"outputId": "331a6c65-f121-4e65-e121-bf8aaad05d9d"
},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject chat.completion id=chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr at 0x12ff93630> JSON: {\n",
" \"id\": \"chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1695490221,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm sorry, but as an AI text-based model, I don't have real-time information. However, you can check the current weather in San Francisco by searching for \\\"weather in SF\\\" on any search engine or checking a weather website or app.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 51,\n",
" \"total_tokens\": 64\n",
" },\n",
" \"response_ms\": 2385.592\n",
"}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"completion(model=\"gpt-3.5-turbo\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "Q3jV1Uxv8zNo"
},
"source": [
"## Call Claude-2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "V8yTWYzY8m9S",
"outputId": "8b6dd32d-f9bf-4e89-886d-47cb8020f025"
},
"outputs": [
{
"data": {
"text/plain": [
"<ModelResponse chat.completion id=chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295 at 0x12ff85a40> JSON: {\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop_sequence\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Unfortunately I don't have enough context to know the exact location you are asking about when you say \\\"SF\\\". SF could refer to San Francisco, California, or potentially other cities that go by SF as an abbreviation. To get an accurate weather report, it would be helpful if you could provide the full city name and state/country. If you are looking for the weather in San Francisco, California, I would be happy to provide that forecast. Please let me know the specific location you want the weather for.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295\",\n",
" \"created\": 1695490260.983768,\n",
" \"response_ms\": 6351.544,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 14,\n",
" \"completion_tokens\": 102,\n",
" \"total_tokens\": 116\n",
" }\n",
"}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"completion(model=\"claude-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "yu0LPDmW9PJa"
},
"source": [
"## Call llama2 on replicate"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0GWV5mtO9Jbu",
"outputId": "38538825-b271-406d-a437-f5cf0eb7e548"
},
"outputs": [
{
"data": {
"text/plain": [
"<ModelResponse chat.completion id=chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0 at 0x138b87e50> JSON: {\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm happy to help! However, I must point out that the question \\\"what's the weather in SF\\\" doesn't make sense as \\\"SF\\\" could refer to multiple locations. Could you please clarify which location you are referring to? San Francisco, California or Sioux Falls, South Dakota? Once I have more context, I would be happy to provide you with accurate and reliable information.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0\",\n",
" \"created\": 1695490237.714101,\n",
" \"response_ms\": 12109.565,\n",
" \"model\": \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 78,\n",
" \"total_tokens\": 84\n",
" },\n",
" \"ended\": 1695490249.821266\n",
"}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n",
"completion(model=model, messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "HXdj5SEe9iLK"
},
"source": [
"## Call Command-Nightly"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "EaUq2xIx9fhr",
"outputId": "55fe6f52-b58b-4729-948a-74dac4b431b2"
},
"outputs": [
{
"data": {
"text/plain": [
"<ModelResponse chat.completion id=chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b at 0x1389725e0> JSON: {\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" As an AI model I don't have access to real-time data, so I can't tell\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b\",\n",
" \"created\": 1695490235.936903,\n",
" \"response_ms\": 1022.6759999999999,\n",
" \"model\": \"command-nightly\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 6,\n",
" \"completion_tokens\": 19,\n",
" \"total_tokens\": 25\n",
" }\n",
"}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"completion(model=\"command-nightly\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "1g9hSgsL9soJ"
},
"source": [
"## Call Azure OpenAI"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"For azure openai calls ensure to add the `azure/` prefix to `model`. If your deployment-id is `chatgpt-test` set `model` = `azure/chatgpt-test`"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "AvLjR-PF-lt0",
"outputId": "deff2db3-b003-48cd-ea62-c03a68a4464a"
},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject chat.completion id=chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO at 0x138b84ae0> JSON: {\n",
" \"id\": \"chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1695490231,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Sorry, as an AI language model, I don't have real-time information. Please check your preferred weather website or app for the latest weather updates of San Francisco.\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 33,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 47\n",
" },\n",
" \"response_ms\": 1499.529\n",
"}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"completion(model=\"azure/chatgpt-v-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,201 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
],
"metadata": {
"id": "5hwntUxTMxEk"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": {
"id": "MOhRaVnhB-0J"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
}
]
}
]
}

View file

@ -1,289 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install litellm # version 0.1.724 or higher "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call Ollama - llama2 with Streaming"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<generator object get_ollama_response_stream at 0x109096c10>\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': \"'\"}\n",
"{'role': 'assistant', 'content': 'm'}\n",
"{'role': 'assistant', 'content': ' L'}\n",
"{'role': 'assistant', 'content': 'La'}\n",
"{'role': 'assistant', 'content': 'MA'}\n",
"{'role': 'assistant', 'content': ','}\n",
"{'role': 'assistant', 'content': ' an'}\n",
"{'role': 'assistant', 'content': ' A'}\n",
"{'role': 'assistant', 'content': 'I'}\n",
"{'role': 'assistant', 'content': ' assistant'}\n",
"{'role': 'assistant', 'content': ' developed'}\n",
"{'role': 'assistant', 'content': ' by'}\n",
"{'role': 'assistant', 'content': ' Meta'}\n",
"{'role': 'assistant', 'content': ' A'}\n",
"{'role': 'assistant', 'content': 'I'}\n",
"{'role': 'assistant', 'content': ' that'}\n",
"{'role': 'assistant', 'content': ' can'}\n",
"{'role': 'assistant', 'content': ' understand'}\n",
"{'role': 'assistant', 'content': ' and'}\n",
"{'role': 'assistant', 'content': ' respond'}\n",
"{'role': 'assistant', 'content': ' to'}\n",
"{'role': 'assistant', 'content': ' human'}\n",
"{'role': 'assistant', 'content': ' input'}\n",
"{'role': 'assistant', 'content': ' in'}\n",
"{'role': 'assistant', 'content': ' a'}\n",
"{'role': 'assistant', 'content': ' convers'}\n",
"{'role': 'assistant', 'content': 'ational'}\n",
"{'role': 'assistant', 'content': ' manner'}\n",
"{'role': 'assistant', 'content': '.'}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\",\n",
" stream=True\n",
")\n",
"print(response)\n",
"for chunk in response:\n",
" print(chunk['choices'][0]['delta'])\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call Ollama - Llama2 with Acompletion + Streaming"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: async_generator in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (1.10)\n"
]
}
],
"source": [
"# litellm uses async_generator for ollama async streaming, ensure it's installed\n",
"!pip install async_generator"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'm'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' just'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' an'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' A'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' don'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 't'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' access'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' real'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'time'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' or'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' current'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' conditions'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' живело'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' can'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' forec'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'asts'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' would'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' like'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Please'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' let'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' me'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' know'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' located'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' do'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' my'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' best'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' assist'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"None\n"
]
}
],
"source": [
"import litellm\n",
"\n",
"async def async_ollama():\n",
" response = await litellm.acompletion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"what's the weather\" ,\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\", \n",
" stream=True\n",
" )\n",
" async for chunk in response:\n",
" print(chunk)\n",
"\n",
"result = await async_ollama()\n",
"print(result)\n",
"\n",
"try:\n",
" async for chunk in result:\n",
" print(chunk)\n",
"except TypeError: # the last chunk is None from Ollama, this raises an error with async streaming\n",
" pass"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Completion Call"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-ea7b8242-791f-4656-ba12-e098edeb960e\",\n",
" \"created\": 1695324686.6696231,\n",
" \"response_ms\": 4072.3050000000003,\n",
" \"model\": \"ollama/llama2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 10,\n",
" \"completion_tokens\": 27,\n",
" \"total_tokens\": 37\n",
" }\n",
"}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\"\n",
")\n",
"print(response)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View file

@ -1,238 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "YV6L5fNv7Kep"
},
"source": [
"# Call Replicate LLMs using chatGPT Input/Output Format\n",
"This tutorial covers using the following Replicate Models with liteLLM\n",
"\n",
"- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n",
"- [LLAMA-2 70B Chat](https://replicate.com/replicate/llama-2-70b-chat)\n",
"- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n",
"- [Dolly V2 12B](https://replicate.com/replicate/dolly-v2-12b)\n",
"- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TO-EdF84O9QT"
},
"outputs": [],
"source": [
"# install liteLLM\n",
"!pip install litellm"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "mpHTbTqQ8fey"
},
"source": [
"Imports & Set ENV variables\n",
"Get your Replicate Key: https://replicate.com/account/api-tokens"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "kDbgfcU8O-dW"
},
"outputs": [],
"source": [
"from litellm import completion\n",
"import os\n",
"os.environ['REPLICATE_API_TOKEN'] = ' ' # @param\n",
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "1KmkOdzLSOmJ"
},
"source": [
"## Call Replicate Models using completion(model, messages) - chatGPT format"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XJ4nh4SnRzHP",
"outputId": "986c0544-bb40-4915-f00f-498b0e518307"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"replicate is not installed. Installing...\n",
"Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb \n",
"]\n",
"\n",
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \"I'm sorry for you being unable to access this content as my training data only goes up until 2023/03. However I can tell you what your local weather forecast may look like at any time of year with respect to current conditions:\"}}], 'created': 1691611730.7224207, 'model': 'stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb', 'usage': {'prompt_tokens': 9, 'completion_tokens': 49, 'total_tokens': 58}}\n",
"Response from replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1 \n",
"]\n",
"\n",
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" Hello! I'm happy to help you with your question. However, I must point out that the question itself may not be meaningful. San Francisco is a city located in California, USA, and it is not possible for me to provide you with the current weather conditions there as I am a text-based AI language model and do not have access to real-time weather data. Additionally, the weather in San Francisco can vary greatly depending on the time of year, so it would be best to check a reliable weather source for the most up-to-date information.\\n\\nIf you meant to ask a different question, please feel free to rephrase it, and I will do my best to assist you in a safe and positive manner.\"}}], 'created': 1691611745.0269957, 'model': 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1', 'usage': {'prompt_tokens': 9, 'completion_tokens': 143, 'total_tokens': 152}}\n",
"Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc \n",
"]\n",
"\n",
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" Hello! I'm here to help you with your question. However, I must inform you that the weather in San Francisco can be quite unpredictable and can change rapidly. It's important to check reliable sources such as AccuWeather or the National Weather Service for the most up-to-date and accurate information about the weather in San Francisco.\\nI cannot provide you with real-time weather data or forecasts as I'm just an AI and do not have access to current weather conditions or predictions. But I can suggest some trustworthy websites or apps where you can find the latest weather updates:\\n* AccuWeather (accuweather.com)\\n* The Weather Channel (weather.com)\\n* Dark Sky (darksky.net)\\n* Weather Underground (wunderground.com)\\nRemember, it's always best to consult multiple sources for the most accurate information when planning your day or trip. Enjoy your day!\"}}], 'created': 1691611748.7723358, 'model': 'a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc', 'usage': {'prompt_tokens': 9, 'completion_tokens': 174, 'total_tokens': 183}}\n",
"Response from replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5 \n",
"]\n",
"\n",
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': 'Its 68 degrees right now in San Francisco! The temperature will be rising through the week and i expect it to reach 70 on Thursdays and Friday. Skies are expected to be partly cloudy with some sun breaks throughout the day.\\n\\n'}}], 'created': 1691611752.2002115, 'model': 'replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5', 'usage': {'prompt_tokens': 9, 'completion_tokens': 48, 'total_tokens': 57}}\n",
"Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b \n",
"]\n",
"\n",
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ''}}], 'created': 1691611752.8998356, 'model': 'replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b', 'usage': {'prompt_tokens': 9, 'completion_tokens': 0, 'total_tokens': 9}}\n"
]
}
],
"source": [
"llama_2 = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n",
"llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n",
"dolly_v2 = \"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5\"\n",
"vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n",
"models = [llama_2, llama_2_7b, dolly_v2, vicuna]\n",
"for model in models:\n",
" response = completion(model=model, messages=messages)\n",
" print(f\"Response from {model} \\n]\\n\")\n",
" print(response)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zlTVLB-7PTV_",
"outputId": "5182275b-3108-46fa-a2cf-745fac4ad110"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hi\n",
" there!\n",
" The\n",
" current\n",
" forecast\n",
" for\n",
" today's\n",
" high\n",
" temperature\n",
" ranges\n",
" from\n",
" 75\n",
" degrees\n",
" Fahrenheit\n",
" all\n",
" day\n",
" to\n",
" 83\n",
" degrees\n",
" Fahrenheit\n",
" with\n",
" possible\n",
" isolated\n",
" thunderstorms\n",
" during\n",
" the\n",
" afternoon\n",
" hours,\n",
" mainly\n",
" at\n",
" sunset\n",
" through\n",
" early\n",
" evening. The\n",
" Pacific\n",
" Ocean\n",
" has\n",
" a\n",
" low\n",
" pressure\n",
" of\n",
" 926\n",
" mb\n",
" and\n",
" mostly\n",
" cloud\n",
" cover\n",
" in\n",
" this\n",
" region\n",
" on\n",
" sunny\n",
" days\n",
" due\n",
" to\n",
" warming\n",
" temperatures\n",
" above\n",
" average\n",
" along\n",
" most\n",
" coastal\n",
" areas\n",
" and\n",
" ocean\n",
" breezes.<|USER|>\n"
]
}
],
"source": [
"# @title Stream Responses from Replicate - Outputs in the same format used by chatGPT streaming\n",
"response = completion(model=llama_2, messages=messages, stream=True)\n",
"\n",
"for chunk in response:\n",
" print(chunk['choices'][0]['delta'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "t7WMRuL-8NrO"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,226 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# [STREAMING] OpenAI, Anthropic, Replicate, Cohere using liteLLM\n",
"In this tutorial:\n",
"Note: All inputs/outputs are in the format used by `gpt-3.5-turbo`\n",
"\n",
"- Call all models in the same input format [**with streaming**]:\n",
"\n",
" `completion(model, messages, stream=True)`\n",
"- All streaming generators are accessed at `chunk['choices'][0]['delta']`\n",
"\n",
"The following Models are covered in this tutorial\n",
"- [GPT-3.5-Turbo](https://platform.openai.com/docs/models/gpt-3-5)\n",
"- [Claude-2](https://www.anthropic.com/index/claude-2)\n",
"- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n",
"- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n",
"- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n",
"- [Cohere - Command Nightly]()\n",
"\n",
"\n",
"\n"
],
"metadata": {
"id": "YV6L5fNv7Kep"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TO-EdF84O9QT"
},
"outputs": [],
"source": [
"# install liteLLM\n",
"!pip install litellm==0.1.369"
]
},
{
"cell_type": "markdown",
"source": [
"## Imports & Set ENV variables\n",
"Get your API Keys\n",
"\n",
"https://platform.openai.com/account/api-keys\n",
"\n",
"https://replicate.com/account/api-tokens\n",
"\n",
"https://console.anthropic.com/account/keys\n",
"\n",
"https://dashboard.cohere.ai/api-keys\n"
],
"metadata": {
"id": "mpHTbTqQ8fey"
}
},
{
"cell_type": "code",
"source": [
"from litellm import completion\n",
"import os\n",
"\n",
"os.environ['OPENAI_API_KEY'] = '' # @param\n",
"os.environ['REPLICATE_API_TOKEN'] = '' # @param\n",
"os.environ['ANTHROPIC_API_KEY'] = '' # @param\n",
"os.environ['COHERE_API_KEY'] = '' # @param"
],
"metadata": {
"id": "kDbgfcU8O-dW"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Set Messages"
],
"metadata": {
"id": "1KmkOdzLSOmJ"
}
},
{
"cell_type": "code",
"source": [
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
],
"metadata": {
"id": "xIEeOhVH-oh6"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling Models using liteLLM Streaming -\n",
"\n",
"## `completion(model, messages, stream)`"
],
"metadata": {
"id": "9SOCVRC1L-G3"
}
},
{
"cell_type": "code",
"source": [
"# replicate models #######\n",
"stability_ai = \"stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\"\n",
"llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n",
"vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n",
"\n",
"models = [\"gpt-3.5-turbo\", \"claude-2\", stability_ai, llama_2_7b, vicuna, \"command-nightly\"] # command-nightly is Cohere\n",
"for model in models:\n",
" replicate = (model == stability_ai or model==llama_2_7b or model==vicuna) # let liteLLM know if a model is replicate, using this optional param, `replicate=True`\n",
" response = completion(model=model, messages=messages, stream=True, replicate=replicate)\n",
" print(f\"####################\\n\\nResponse from {model}\")\n",
" for i, chunk in enumerate(response):\n",
" if i < 5: # NOTE: LIMITING CHUNKS FOR THIS DEMO\n",
" print((chunk['choices'][0]['delta']))\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XJ4nh4SnRzHP",
"outputId": "26b9fe10-b499-4a97-d60d-a8cb8f8030b8"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"####################\n",
"\n",
"Response from gpt-3.5-turbo\n",
"{\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\"\n",
"}\n",
"{\n",
" \"content\": \"I\"\n",
"}\n",
"{\n",
" \"content\": \"'m\"\n",
"}\n",
"{\n",
" \"content\": \" sorry\"\n",
"}\n",
"{\n",
" \"content\": \",\"\n",
"}\n",
"####################\n",
"\n",
"Response from claude-2\n",
"{'role': 'assistant', 'content': ' Unfortunately'}\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': ' don'}\n",
"{'role': 'assistant', 'content': \"'t\"}\n",
"{'role': 'assistant', 'content': ' have'}\n",
"####################\n",
"\n",
"Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\n",
"{'role': 'assistant', 'content': \"I'm\"}\n",
"{'role': 'assistant', 'content': ' sorry,'}\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': ' cannot'}\n",
"{'role': 'assistant', 'content': ' answer'}\n",
"####################\n",
"\n",
"Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\n",
"{'role': 'assistant', 'content': ''}\n",
"{'role': 'assistant', 'content': ' Hello'}\n",
"{'role': 'assistant', 'content': '!'}\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': \"'\"}\n",
"####################\n",
"\n",
"Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\n",
"{'role': 'assistant', 'content': 'Comment:'}\n",
"{'role': 'assistant', 'content': 'Hi! '}\n",
"{'role': 'assistant', 'content': 'How '}\n",
"{'role': 'assistant', 'content': 'are '}\n",
"{'role': 'assistant', 'content': 'you '}\n",
"####################\n",
"\n",
"Response from command-nightly\n",
"{'role': 'assistant', 'content': ' Hello'}\n",
"{'role': 'assistant', 'content': '!'}\n",
"{'role': 'assistant', 'content': ' '}\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': \"'m\"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "t7WMRuL-8NrO"
},
"execution_count": null,
"outputs": []
}
]
}

View file

@ -1,199 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using Google Palm (VertexAI) with liteLLM \n",
"### chat-bison, chat-bison@001, text-bison, text-bison@001"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install litellm==0.1.388"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set VertexAI Configs\n",
"Vertex AI requires the following:\n",
"* `vertex_project` - Your Project ID\n",
"* `vertex_location` - Your Vertex AI region\n",
"Both can be found on: https://console.cloud.google.com/\n",
"\n",
"VertexAI uses Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information on setting this up\n",
"\n",
"NOTE: VertexAI requires you to set `application_default_credentials.json`, this can be set by running `gcloud auth application-default login` in your terminal\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# set you Vertex AI configs\n",
"import litellm\n",
"from litellm import embedding, completion\n",
"\n",
"litellm.vertex_project = \"hardy-device-386718\"\n",
"litellm.vertex_location = \"us-central1\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call VertexAI - chat-bison using liteLLM"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': LiteLLM LiteLLM is a large language model from Google AI that is designed to be lightweight and efficient. It is based on the Transformer architecture and has been trained on a massive dataset of text. LiteLLM is available as a pre-trained model that can be used for a variety of natural language processing tasks, such as text classification, question answering, and summarization.}}], 'created': 1692036777.831989, 'model': 'chat-bison'}\n"
]
}
],
"source": [
"user_message = \"what is liteLLM \"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"# chat-bison or chat-bison@001 supported by Vertex AI (As of Aug 2023)\n",
"response = completion(model=\"chat-bison\", messages=messages)\n",
"print(response)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call VertexAI - text-bison using liteLLM"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['text-bison', 'text-bison@001']\n"
]
}
],
"source": [
"print(litellm.vertex_text_models)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM is a low-precision variant of the large language model LLM 5. For a given text prompt, liteLLM can continue the text in a way that is both coherent and informative.}}], 'created': 1692036813.052487, 'model': 'text-bison@001'}\n"
]
}
],
"source": [
"user_message = \"what is liteLLM \"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"# text-bison or text-bison@001 supported by Vertex AI (As of Aug 2023)\n",
"response = completion(model=\"text-bison@001\", messages=messages)\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM was originally developed by Google engineers as a lite version of LLM, which stands for large language model. It is a deep learning language model that is designed to be more efficient than traditional LLMs while still achieving comparable performance. liteLLM is built on Tensor2Tensor, a framework for building and training large neural networks. It is able to learn from massive amounts of text data and generate text that is both coherent and informative. liteLLM has been shown to be effective for a variety of tasks, including machine translation, text summarization, and question answering.}}], 'created': 1692036821.60951, 'model': 'text-bison'}\n"
]
}
],
"source": [
"response = completion(model=\"text-bison\", messages=messages)\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"liteLLM is a lightweight language model that is designed to be fast and efficient. It is based on the Transformer architecture, but it has been modified to reduce the number of parameters and the amount of computation required. This makes it suitable for use on devices with limited resources, such as mobile phones and embedded systems.\n",
"\n",
"liteLLM is still under development, but it has already been shown to be effective on a variety of tasks, including text classification, natural language inference, and machine translation. It is also being used to develop new applications, such as chatbots and language assistants.\n",
"\n",
"If you are interested in learning more about lite\n"
]
}
],
"source": [
"response = completion(model=\"text-bison@001\", messages=messages, temperature=0.4, top_k=10, top_p=0.2)\n",
"print(response['choices'][0]['message']['content'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View file

@ -1,187 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LiteLLM Clarifai \n",
"This notebook walks you through on how to use liteLLM integration of Clarifai and call LLM model from clarifai with response in openAI output format."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pre-Requisites"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#install necessary packages\n",
"!pip install litellm\n",
"!pip install clarifai"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To obtain Clarifai Personal Access Token follow the steps mentioned in the [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"## Set Clarifai Credentials\n",
"import os\n",
"os.environ[\"CLARIFAI_API_KEY\"]= \"YOUR_CLARIFAI_PAT\" # Clarifai PAT"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mistral-large"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import litellm\n",
"\n",
"litellm.set_verbose=False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mistral large response : ModelResponse(id='chatcmpl-6eed494d-7ae2-4870-b9c2-6a64d50a6151', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\"In the grand tapestry of time, where tales unfold,\\nLies the chronicle of ages, a sight to behold.\\nA tale of empires rising, and kings of old,\\nOf civilizations lost, and stories untold.\\n\\nOnce upon a yesterday, in a time so vast,\\nHumans took their first steps, casting shadows in the past.\\nFrom the cradle of mankind, a journey they embarked,\\nThrough stone and bronze and iron, their skills they sharpened and marked.\\n\\nEgyptians built pyramids, reaching for the skies,\\nWhile Greeks sought wisdom, truth, in philosophies that lie.\\nRoman legions marched, their empire to expand,\\nAnd in the East, the Silk Road joined the world, hand in hand.\\n\\nThe Middle Ages came, with knights in shining armor,\\nFeudal lords and serfs, a time of both clamor and calm order.\\nThen Renaissance bloomed, like a flower in the sun,\\nA rebirth of art and science, a new age had begun.\\n\\nAcross the vast oceans, explorers sailed with courage bold,\\nDiscovering new lands, stories of adventure, untold.\\nIndustrial Revolution churned, progress in its wake,\\nMachines and factories, a whole new world to make.\\n\\nTwo World Wars raged, a testament to man's strife,\\nYet from the ashes rose hope, a renewed will for life.\\nInto the modern era, technology took flight,\\nConnecting every corner, bathed in digital light.\\n\\nHistory, a symphony, a melody of time,\\nA testament to human will, resilience so sublime.\\nIn every page, a lesson, in every tale, a guide,\\nFor understanding our past, shapes our future's tide.\", role='assistant'))], created=1713896412, model='https://api.clarifai.com/v2/users/mistralai/apps/completion/models/mistral-large/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=13, completion_tokens=338, total_tokens=351))\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
"response=completion(\n",
" model=\"clarifai/mistralai.completion.mistral-large\",\n",
" messages=messages,\n",
" )\n",
"\n",
"print(f\"Mistral large response : {response}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Claude-2.1 "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Claude-2.1 response : ModelResponse(id='chatcmpl-d126c919-4db4-4aa3-ac8f-7edea41e0b93', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\" Here's a poem I wrote about history:\\n\\nThe Tides of Time\\n\\nThe tides of time ebb and flow,\\nCarrying stories of long ago.\\nFigures and events come into light,\\nShaping the future with all their might.\\n\\nKingdoms rise, empires fall, \\nLeaving traces that echo down every hall.\\nRevolutions bring change with a fiery glow,\\nToppling structures from long ago.\\n\\nExplorers traverse each ocean and land,\\nSeeking treasures they don't understand.\\nWhile artists and writers try to make their mark,\\nHoping their works shine bright in the dark.\\n\\nThe cycle repeats again and again,\\nAs humanity struggles to learn from its pain.\\nThough the players may change on history's stage,\\nThe themes stay the same from age to age.\\n\\nWar and peace, life and death,\\nLove and strife with every breath.\\nThe tides of time continue their dance,\\nAs we join in, by luck or by chance.\\n\\nSo we study the past to light the way forward, \\nHeeding warnings from stories told and heard.\\nThe future unfolds from this unending flow -\\nWhere the tides of time ultimately go.\", role='assistant'))], created=1713896579, model='https://api.clarifai.com/v2/users/anthropic/apps/completion/models/claude-2_1/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=12, completion_tokens=232, total_tokens=244))\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
"response=completion(\n",
" model=\"clarifai/anthropic.completion.claude-2_1\",\n",
" messages=messages,\n",
" )\n",
"\n",
"print(f\"Claude-2.1 response : {response}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### OpenAI GPT-4 (Streaming)\n",
"Though clarifai doesn't support streaming, still you can call stream and get the response in standard StreamResponse format of liteLLM"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"In the quiet corners of time's grand hall,\\nLies the tale of rise and fall.\\nFrom ancient ruins to modern sprawl,\\nHistory, the greatest story of them all.\\n\\nEmpires have risen, empires have decayed,\\nThrough the eons, memories have stayed.\\nIn the book of time, history is laid,\\nA tapestry of events, meticulously displayed.\\n\\nThe pyramids of Egypt, standing tall,\\nThe Roman Empire's mighty sprawl.\\nFrom Alexander's conquest, to the Berlin Wall,\\nHistory, a silent witness to it all.\\n\\nIn the shadow of the past we tread,\\nWhere once kings and prophets led.\\nTheir stories in our hearts are spread,\\nEchoes of their words, in our minds are read.\\n\\nBattles fought and victories won,\\nActs of courage under the sun.\\nTales of love, of deeds done,\\nIn history's grand book, they all run.\\n\\nHeroes born, legends made,\\nIn the annals of time, they'll never fade.\\nTheir triumphs and failures all displayed,\\nIn the eternal march of history's parade.\\n\\nThe ink of the past is forever dry,\\nBut its lessons, we cannot deny.\\nIn its stories, truths lie,\\nIn its wisdom, we rely.\\n\\nHistory, a mirror to our past,\\nA guide for the future vast.\\nThrough its lens, we're ever cast,\\nIn the drama of life, forever vast.\", role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n",
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
"response = completion(\n",
" model=\"clarifai/openai.chat-completion.GPT-4\",\n",
" messages=messages,\n",
" stream=True,\n",
" api_key = \"c75cc032415e45368be331fdd2c06db0\")\n",
"\n",
"for chunk in response:\n",
" print(chunk)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View file

@ -1,331 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`"
],
"metadata": {
"id": "vnvlwUDZK7VA"
}
},
{
"cell_type": "code",
"source": [
"## Install liteLLM\n",
"!pip install litellm"
],
"metadata": {
"id": "KrINCwRfLgZV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os, litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "nK7zR5OgLlh2"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
],
"metadata": {
"id": "dCQlyBxKLqbA"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
],
"metadata": {
"id": "gfdGv-FMRCdX"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
],
"metadata": {
"id": "ERzsP1sfM19C"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
],
"metadata": {
"id": "NX6by2VuRPnp"
}
},
{
"cell_type": "code",
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
],
"metadata": {
"id": "Yu0o2saDNLx8"
}
},
{
"cell_type": "code",
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Call the get_current_weather() function"
],
"metadata": {
"id": "z3tstH_yN3fX"
}
},
{
"cell_type": "code",
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"12F\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Send the response from get_current_weather back to the model to summarize"
],
"metadata": {
"id": "k4HGJE3NRmMI"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
]
}
]
}

View file

@ -1,25 +0,0 @@
FROM ollama/ollama as ollama
RUN echo "auto installing llama2"
# auto install ollama/llama2
RUN ollama serve & sleep 2 && ollama pull llama2
RUN echo "installing litellm"
RUN apt-get update
# Install Python
RUN apt-get install -y python3 python3-pip
# Set the working directory in the container
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
# Install any needed packages specified in requirements.txt
RUN python3 -m pip install litellm
COPY start.sh /start.sh
ENTRYPOINT [ "/bin/bash", "/start.sh" ]

View file

@ -1 +0,0 @@
litellm

View file

@ -1,2 +0,0 @@
ollama serve &
litellm

View file

@ -1,35 +0,0 @@
import openai
api_base = f"http://0.0.0.0:8000"
openai.api_base = api_base
openai.api_key = "temp-key"
print(openai.api_base)
print(f"LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages=[
{
"role": "user",
"content": "this is a test request, acknowledge that you got it",
}
],
stream=True,
)
for chunk in response:
print(f"LiteLLM: streaming response from proxy {chunk}")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages=[
{
"role": "user",
"content": "this is a test request, acknowledge that you got it",
}
],
)
print(f"LiteLLM: response from proxy {response}")

File diff suppressed because one or more lines are too long

View file

@ -1,52 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"!pip install litellm"
],
"metadata": {
"id": "j6yJsCGeaq8G"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u129iWNPaf72"
},
"outputs": [],
"source": [
"import litellm\n",
"from litellm import embedding, completion\n",
"\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n",
"user_message = \"Hello, how are you?\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"for model in model_fallback_list:\n",
" try:\n",
" response = completion(model=model, messages=messages)\n",
" except Exception as e:\n",
" print(f\"error occurred: {traceback.format_exc()}\")"
]
}
]
}

View file

@ -1,594 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2039,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum(rate(litellm_self_latency_bucket{self=\"self\"}[1m])) by (le))",
"legendFormat": "Time to first token",
"range": true,
"refId": "A"
}
],
"title": "Time to first token (latency)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyUSD"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "7e4b0627fd32efdd2313c846325575808aadcf2839f0fde90723aab9ab73c78f"
},
"properties": [
{
"id": "displayName",
"value": "Translata"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "sum(increase(litellm_spend_metric_total[30d])) by (hashed_api_key)",
"legendFormat": "{{team}}",
"range": true,
"refId": "A"
}
],
"title": "Spend by team",
"transformations": [],
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 16
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "sum by (model) (increase(litellm_requests_metric_total[5m]))",
"legendFormat": "{{model}}",
"range": true,
"refId": "A"
}
],
"title": "Requests by model",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 3,
"x": 0,
"y": 25
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "9.4.17",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "sum(increase(litellm_llm_api_failed_requests_metric_total[1h]))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Faild Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyUSD"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 3,
"x": 3,
"y": 25
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "sum(increase(litellm_spend_metric_total[30d])) by (model)",
"legendFormat": "{{model}}",
"range": true,
"refId": "A"
}
],
"title": "Spend",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 6,
"y": 25
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "rMzWaBvIk"
},
"editorMode": "code",
"expr": "sum(increase(litellm_total_tokens_total[5m])) by (model)",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Tokens",
"type": "timeseries"
}
],
"refresh": "1m",
"revision": 1,
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "LLM Proxy",
"uid": "rgRrHxESz",
"version": 15,
"weekStart": ""
}

View file

@ -1,6 +0,0 @@
## This folder contains the `json` for creating the following Grafana Dashboard
### Pre-Requisites
- Setup LiteLLM Proxy Prometheus Metrics https://docs.litellm.ai/docs/proxy/prometheus
![1716623265684](https://github.com/BerriAI/litellm/assets/29436595/0e12c57e-4a2d-4850-bd4f-e4294f87a814)

View file

@ -1,807 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 20,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 3,
"panels": [],
"title": "LiteLLM Proxy Level Metrics",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "Total requests per second made to proxy - success + failure ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 1
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m]))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Proxy - Requests per second (success + failure)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "Failures per second by Exception Class",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 1
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(litellm_proxy_failed_requests_metric_total[2m])) by (exception_class)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Proxy Failure Responses / Second By Exception Class",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "Average Response latency (seconds)",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "sum(rate(litellm_request_total_latency_metric_sum[2m]))/sum(rate(litellm_request_total_latency_metric_count[2m]))"
},
"properties": [
{
"id": "displayName",
"value": "Average Latency (seconds)"
}
]
},
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(litellm_request_total_latency_metric_bucket[2m])) by (le))"
},
"properties": [
{
"id": "displayName",
"value": "Median Latency (seconds)"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(litellm_request_total_latency_metric_sum[2m]))/sum(rate(litellm_request_total_latency_metric_count[2m]))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(litellm_request_total_latency_metric_bucket[2m])) by (le))",
"hide": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "Median latency seconds"
}
],
"title": "Proxy - Average & Median Response Latency (seconds)",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 7,
"panels": [],
"title": "LLM API Metrics",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "x-ratelimit-remaining-requests returning from LLM APIs",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"editorMode": "code",
"expr": "topk(5, sort(litellm_remaining_requests))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "x-ratelimit-remaining-requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "x-ratelimit-remaining-tokens from LLM API ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 18
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"editorMode": "code",
"expr": "topk(5, sort(litellm_remaining_tokens))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "x-ratelimit-remaining-tokens",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 4,
"panels": [],
"title": "LiteLLM Metrics by Virtual Key and Team",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "Requests per second by Key Alias (keys are LiteLLM Virtual Keys). If key is None - means no Alias Set ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 27
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"editorMode": "code",
"expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m])) by (api_key_alias)\n",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Requests per second by Key Alias",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"description": "Requests per second by Team Alias. If team is None - means no team alias Set ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 27
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0-76761.patch01-77040",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bdiyc60dco54we"
},
"editorMode": "code",
"expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m])) by (team_alias)\n",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Requests per second by Team Alias",
"type": "timeseries"
}
],
"preload": false,
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "LiteLLM Prod v2",
"uid": "be059pwgrlg5cf",
"version": 17,
"weekStart": ""
}

View file

@ -1,14 +0,0 @@
# Contains LiteLLM maintained grafana dashboard
This folder contains the `json` for creating Grafana Dashboards
## [LiteLLM v2 Dashboard](./dashboard_v2)
<img width="1316" alt="grafana_1" src="https://github.com/user-attachments/assets/d0df802d-0cb9-4906-a679-941c547789ab">
<img width="1289" alt="grafana_2" src="https://github.com/user-attachments/assets/b11f755f-e113-42ab-b21d-83f91f451a28">
<img width="1323" alt="grafana_3" src="https://github.com/user-attachments/assets/cb29ffdb-477d-4be1-a5cd-c3f7f2cb21c5">
### Pre-Requisites
- Setup LiteLLM Proxy Prometheus Metrics https://docs.litellm.ai/docs/proxy/prometheus

View file

@ -1,178 +0,0 @@
# liteLLM Proxy Server: 50+ LLM Models, Error Handling, Caching
### Azure, Llama2, OpenAI, Claude, Hugging Face, Replicate Models
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
![4BC6491E-86D0-4833-B061-9F54524B2579](https://github.com/BerriAI/litellm/assets/17561003/f5dd237b-db5e-42e1-b1ac-f05683b1d724)
## What does liteLLM proxy do
- Make `/chat/completions` requests for 50+ LLM models **Azure, OpenAI, Replicate, Anthropic, Hugging Face**
Example: for `model` use `claude-2`, `gpt-3.5`, `gpt-4`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
```json
{
"model": "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
}
```
- **Consistent Input/Output** Format
- Call all models using the OpenAI format - `completion(model, messages)`
- Text responses will always be available at `['choices'][0]['message']['content']`
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Lunary`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
**Example: Logs sent to Supabase**
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
- **Token Usage & Spend** - Track Input + Completion tokens used + Spend/model
- **Caching** - Implementation of Semantic Caching
- **Streaming & Async Support** - Return generators to stream text responses
## API Endpoints
### `/chat/completions` (POST)
This endpoint is used to generate chat completions for 50+ support LLM API Models. Use llama2, GPT-4, Claude2 etc
#### Input
This API endpoint accepts all inputs in raw JSON and expects the following inputs
- `model` (string, required): ID of the model to use for chat completions. See all supported models [here]: (https://litellm.readthedocs.io/en/latest/supported/):
eg `gpt-3.5-turbo`, `gpt-4`, `claude-2`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
- `messages` (array, required): A list of messages representing the conversation context. Each message should have a `role` (system, user, assistant, or function), `content` (message text), and `name` (for function role).
- Additional Optional parameters: `temperature`, `functions`, `function_call`, `top_p`, `n`, `stream`. See the full list of supported inputs here: https://litellm.readthedocs.io/en/latest/input/
#### Example JSON body
For claude-2
```json
{
"model": "claude-2",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
}
```
### Making an API request to the Proxy Server
```python
import requests
import json
# TODO: use your URL
url = "http://localhost:5000/chat/completions"
payload = json.dumps({
"model": "gpt-3.5-turbo",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
```
### Output [Response Format]
Responses from the server are given in the following format.
All responses from the server are returned in the following format (for all LLM models). More info on output here: https://litellm.readthedocs.io/en/latest/output/
```json
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "I'm sorry, but I don't have the capability to provide real-time weather information. However, you can easily check the weather in San Francisco by searching online or using a weather app on your phone.",
"role": "assistant"
}
}
],
"created": 1691790381,
"id": "chatcmpl-7mUFZlOEgdohHRDx2UpYPRTejirzb",
"model": "gpt-3.5-turbo-0613",
"object": "chat.completion",
"usage": {
"completion_tokens": 41,
"prompt_tokens": 16,
"total_tokens": 57
}
}
```
## Installation & Usage
### Running Locally
1. Clone liteLLM repository to your local machine:
```
git clone https://github.com/BerriAI/liteLLM-proxy
```
2. Install the required dependencies using pip
```
pip install -r requirements.txt
```
3. Set your LLM API keys
```
os.environ['OPENAI_API_KEY]` = "YOUR_API_KEY"
or
set OPENAI_API_KEY in your .env file
```
4. Run the server:
```
python main.py
```
## Deploying
1. Quick Start: Deploy on Railway
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
2. `GCP`, `AWS`, `Azure`
This project includes a `Dockerfile` allowing you to build and deploy a Docker Project on your providers
# Support / Talk with founders
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap
- [ ] Support hosted db (e.g. Supabase)
- [ ] Easily send data to places like posthog and sentry.
- [ ] Add a hot-cache for project spend logs - enables fast checks for user + project limitings
- [ ] Implement user-based rate-limiting
- [ ] Spending controls per project - expose key creation endpoint
- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)

File diff suppressed because it is too large Load diff

View file

@ -1,150 +0,0 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
litellm.set_verbose = False
os.environ.pop("AZURE_AD_TOKEN")
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
print(f"File not found: {e}")
except Exception as e:
print(f"An error occurred: {e}")
# for q in questions:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
import time
# Function to make concurrent calls to OpenAI API
def make_openai_completion(question):
try:
start_time = time.time()
import openai
client = openai.OpenAI(
api_key=os.environ["OPENAI_API_KEY"], base_url="http://0.0.0.0:8000"
) # base_url="http://0.0.0.0:8000",
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
}
],
)
print(response)
end_time = time.time()
# Log the request details
with open("request_log.txt", "a") as log_file:
log_file.write(
f"Question: {question[:100]}\nResponse ID:{response.id} Content:{response.choices[0].message.content[:10]}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
return response
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 100
# List to store the futures of concurrent calls
futures = []
# Make concurrent calls
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
for _ in range(concurrent_calls):
random_question = random.choice(questions)
futures.append(executor.submit(make_openai_completion, random_question))
# Wait for all futures to complete
concurrent.futures.wait(futures)
# Summarize the results
successful_calls = 0
failed_calls = 0
for future in futures:
if future.result() is not None:
successful_calls += 1
else:
failed_calls += 1
print(f"Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")
# Display content of the logs
with open("request_log.txt", "r") as log_file:
print("\nRequest Log:\n", log_file.read())
with open("error_log.txt", "r") as error_log_file:
print("\nError Log:\n", error_log_file.read())

View file

@ -1,166 +0,0 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
litellm.set_verbose = False
# os.environ.pop("AZURE_AD_TOKEN")
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
print(f"File not found: {e}")
except Exception as e:
print(f"An error occurred: {e}")
# for q in questions:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
import time
# Function to make concurrent calls to OpenAI API
def make_openai_completion(question):
try:
start_time = time.time()
import requests
data = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
},
],
}
response = requests.post("http://0.0.0.0:8000/queue/request", json=data)
response = response.json()
end_time = time.time()
# Log the request details
with open("request_log.txt", "a") as log_file:
log_file.write(
f"Question: {question[:100]}\nResponse ID: {response.get('id', 'N/A')} Url: {response.get('url', 'N/A')}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
# polling the url
while True:
try:
url = response["url"]
polling_url = f"http://0.0.0.0:8000{url}"
polling_response = requests.get(polling_url)
polling_response = polling_response.json()
print("\n RESPONSE FROM POLLING JoB", polling_response)
status = polling_response["status"]
if status == "finished":
llm_response = polling_response["result"]
with open("response_log.txt", "a") as log_file:
log_file.write(
f"Response ID: {llm_response.get('id', 'NA')}\nLLM Response: {llm_response}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
break
print(
f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}"
)
time.sleep(0.5)
except Exception as e:
print("got exception in polling", e)
break
return response
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 10
# List to store the futures of concurrent calls
futures = []
# Make concurrent calls
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
for _ in range(concurrent_calls):
random_question = random.choice(questions)
futures.append(executor.submit(make_openai_completion, random_question))
# Wait for all futures to complete
concurrent.futures.wait(futures)
# Summarize the results
successful_calls = 0
failed_calls = 0
for future in futures:
if future.done():
if future.result() is not None:
successful_calls += 1
else:
failed_calls += 1
print(f"Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,145 +0,0 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
litellm.set_verbose = False
os.environ.pop("AZURE_AD_TOKEN")
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
print(f"File not found: {e}")
except Exception as e:
print(f"An error occurred: {e}")
# for q in questions:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
import time
# Function to make concurrent calls to OpenAI API
def make_openai_completion(question):
try:
start_time = time.time()
response = router.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
}
],
)
print(response)
end_time = time.time()
# Log the request details
with open("request_log.txt", "a") as log_file:
log_file.write(
f"Question: {question[:100]}\nResponse: {response.choices[0].message.content}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
return response
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 150
# List to store the futures of concurrent calls
futures = []
# Make concurrent calls
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
for _ in range(concurrent_calls):
random_question = random.choice(questions)
futures.append(executor.submit(make_openai_completion, random_question))
# Wait for all futures to complete
concurrent.futures.wait(futures)
# Summarize the results
successful_calls = 0
failed_calls = 0
for future in futures:
if future.result() is not None:
successful_calls += 1
else:
failed_calls += 1
print(f"Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")
# Display content of the logs
with open("request_log.txt", "r") as log_file:
print("\nRequest Log:\n", log_file.read())
with open("error_log.txt", "r") as error_log_file:
print("\nError Log:\n", error_log_file.read())

Some files were not shown because too many files have changed in this diff Show more