forked from phoenix/litellm-mirror
version '0.1.341' returns usage across providers
This commit is contained in:
parent
580918f360
commit
7575d7ea47
9 changed files with 89 additions and 54 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
118
litellm/main.py
118
litellm/main.py
|
@ -1,14 +1,13 @@
|
||||||
import os, openai, cohere, replicate, sys
|
import os, openai, cohere, replicate, sys
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
import traceback
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import dotenv
|
import dotenv, traceback, random, asyncio, time
|
||||||
import traceback
|
from copy import deepcopy
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import client, logging, exception_type, timeout, get_optional_params
|
from litellm import client, logging, exception_type, timeout, get_optional_params
|
||||||
import random
|
import tiktoken
|
||||||
import asyncio
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from tenacity import (
|
from tenacity import (
|
||||||
retry,
|
retry,
|
||||||
stop_after_attempt,
|
stop_after_attempt,
|
||||||
|
@ -17,6 +16,17 @@ from tenacity import (
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
|
||||||
|
new_response = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
# TODO move this to utils.py
|
# TODO move this to utils.py
|
||||||
# TODO add translations
|
# TODO add translations
|
||||||
# TODO see if this worked - model_name == krrish
|
# TODO see if this worked - model_name == krrish
|
||||||
|
@ -44,6 +54,8 @@ def completion(
|
||||||
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
|
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
global new_response
|
||||||
|
model_response = deepcopy(new_response) # deep copy the default response format so we can mutate it and it's thread-safe.
|
||||||
# check if user passed in any of the OpenAI optional params
|
# check if user passed in any of the OpenAI optional params
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
functions=functions, function_call=function_call,
|
functions=functions, function_call=function_call,
|
||||||
|
@ -128,6 +140,15 @@ def completion(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
)
|
)
|
||||||
|
completion_response = response["choices"]["text"]
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
model_response["created"] = response["created"]
|
||||||
|
model_response["model"] = model
|
||||||
|
model_response["usage"] = response["usage"]
|
||||||
|
response = model_response
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
# checking in case user set it to REPLICATE_API_KEY instead
|
# checking in case user set it to REPLICATE_API_KEY instead
|
||||||
|
@ -153,19 +174,21 @@ def completion(
|
||||||
response = ""
|
response = ""
|
||||||
for item in output:
|
for item in output:
|
||||||
response += item
|
response += item
|
||||||
new_response = {
|
completion_response = response
|
||||||
"choices": [
|
## LOGGING
|
||||||
{
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
"finish_reason": "stop",
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
"index": 0,
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
"message": {
|
## RESPONSE OBJECT
|
||||||
"content": response,
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
"role": "assistant"
|
model_response["created"] = time.time()
|
||||||
}
|
model_response["model"] = model
|
||||||
}
|
model_response["usage"] = {
|
||||||
]
|
"prompt_tokens": prompt_tokens,
|
||||||
}
|
"completion_tokens": completion_tokens,
|
||||||
response = new_response
|
"total_tokens": prompt_tokens + completion_tokens
|
||||||
|
}
|
||||||
|
response = model_response
|
||||||
elif model in litellm.anthropic_models:
|
elif model in litellm.anthropic_models:
|
||||||
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||||
if api_key:
|
if api_key:
|
||||||
|
@ -183,7 +206,6 @@ def completion(
|
||||||
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
prompt += f"{AI_PROMPT}"
|
prompt += f"{AI_PROMPT}"
|
||||||
anthropic = Anthropic()
|
anthropic = Anthropic()
|
||||||
# check if user passed in max_tokens != float('inf')
|
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float('inf'):
|
||||||
max_tokens_to_sample = max_tokens
|
max_tokens_to_sample = max_tokens
|
||||||
else:
|
else:
|
||||||
|
@ -196,20 +218,22 @@ def completion(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
max_tokens_to_sample=max_tokens_to_sample
|
max_tokens_to_sample=max_tokens_to_sample
|
||||||
)
|
)
|
||||||
new_response = {
|
completion_response = completion.completion
|
||||||
"choices": [
|
## LOGGING
|
||||||
{
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
"finish_reason": "stop",
|
prompt_tokens = anthropic.count_tokens(prompt)
|
||||||
"index": 0,
|
completion_tokens = anthropic.count_tokens(completion_response)
|
||||||
"message": {
|
## RESPONSE OBJECT
|
||||||
"content": completion.completion,
|
print(f"model_response: {model_response}")
|
||||||
"role": "assistant"
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
}
|
model_response["created"] = time.time()
|
||||||
}
|
model_response["model"] = model
|
||||||
]
|
model_response["usage"] = {
|
||||||
}
|
"prompt_tokens": prompt_tokens,
|
||||||
print_verbose(f"new response: {new_response}")
|
"completion_tokens": completion_tokens,
|
||||||
response = new_response
|
"total_tokens": prompt_tokens + completion_tokens
|
||||||
|
}
|
||||||
|
response = model_response
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
if api_key:
|
if api_key:
|
||||||
cohere_key = api_key
|
cohere_key = api_key
|
||||||
|
@ -226,19 +250,21 @@ def completion(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
)
|
)
|
||||||
new_response = {
|
completion_response = response[0].text
|
||||||
"choices": [
|
## LOGGING
|
||||||
{
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||||
"finish_reason": "stop",
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
"index": 0,
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
"message": {
|
## RESPONSE OBJECT
|
||||||
"content": response[0].text,
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
"role": "assistant"
|
model_response["created"] = time.time()
|
||||||
}
|
model_response["model"] = model
|
||||||
}
|
model_response["usage"] = {
|
||||||
],
|
"prompt_tokens": prompt_tokens,
|
||||||
}
|
"completion_tokens": completion_tokens,
|
||||||
response = new_response
|
"total_tokens": prompt_tokens + completion_tokens
|
||||||
|
}
|
||||||
|
response = model_response
|
||||||
else:
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
|
Binary file not shown.
|
@ -8,11 +8,14 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
user_message = "Hello, whats the weather in San Francisco??"
|
user_message = "Hello, whats the weather in San Francisco??"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
def logger_fn(user_model_dict):
|
||||||
|
print(f"user_model_dict: {user_model_dict}")
|
||||||
|
|
||||||
def test_completion_openai():
|
def test_completion_openai():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
@ -83,7 +86,7 @@ def test_completion_azure():
|
||||||
|
|
||||||
def test_completion_claude():
|
def test_completion_claude():
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages)
|
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -97,7 +100,8 @@ def test_completion_cohere():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
|
||||||
|
# [TODO] improve our try-except block to handle for these
|
||||||
# def test_completion_replicate_llama():
|
# def test_completion_replicate_llama():
|
||||||
# model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
# model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -33,13 +33,16 @@ def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=N
|
||||||
if azure:
|
if azure:
|
||||||
model_call_details["azure"] = azure
|
model_call_details["azure"] = azure
|
||||||
if exception:
|
if exception:
|
||||||
model_call_details["original_exception"] = exception
|
model_call_details["exception"] = exception
|
||||||
|
|
||||||
if litellm.telemetry:
|
if litellm.telemetry:
|
||||||
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
||||||
|
|
||||||
if input:
|
if input:
|
||||||
model_call_details["input"] = input
|
model_call_details["input"] = input
|
||||||
|
|
||||||
|
if len(additional_args):
|
||||||
|
model_call_details["additional_args"] = additional_args
|
||||||
# log additional call details -> api key, etc.
|
# log additional call details -> api key, etc.
|
||||||
if model:
|
if model:
|
||||||
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
||||||
|
@ -53,7 +56,6 @@ def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=N
|
||||||
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
model_call_details["additional_args"] = additional_args
|
|
||||||
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
||||||
print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
|
print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
|
||||||
if logger_fn and callable(logger_fn):
|
if logger_fn and callable(logger_fn):
|
||||||
|
@ -318,6 +320,7 @@ def exception_type(model, original_exception):
|
||||||
exception_type = type(original_exception).__name__
|
exception_type = type(original_exception).__name__
|
||||||
else:
|
else:
|
||||||
exception_type = ""
|
exception_type = ""
|
||||||
|
logging(model=model, additional_args={"error_str": error_str, "exception_type": exception_type, "original_exception": original_exception}, logger_fn=user_logger_fn)
|
||||||
if "claude" in model: #one of the anthropics
|
if "claude" in model: #one of the anthropics
|
||||||
if "status_code" in original_exception:
|
if "status_code" in original_exception:
|
||||||
print_verbose(f"status_code: {original_exception.status_code}")
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
|
@ -357,7 +360,7 @@ def exception_type(model, original_exception):
|
||||||
raise original_exception
|
raise original_exception
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(logger_fn=user_logger_fn, additional_args={"original_exception": original_exception}, exception=e)
|
logging(logger_fn=user_logger_fn, additional_args={"exception_mapping_worked": exception_mapping_worked, "original_exception": original_exception}, exception=e)
|
||||||
if exception_mapping_worked:
|
if exception_mapping_worked:
|
||||||
raise e
|
raise e
|
||||||
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
|
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.331"
|
version = "0.1.341"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
@ -15,6 +15,7 @@ anthropic = "^0.3.7"
|
||||||
replicate = "^0.10.0"
|
replicate = "^0.10.0"
|
||||||
python-dotenv = "^1.0.0"
|
python-dotenv = "^1.0.0"
|
||||||
tenacity = "^8.0.1"
|
tenacity = "^8.0.1"
|
||||||
|
tiktoken = "^0.4.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
|
|
@ -5,4 +5,5 @@ replicate
|
||||||
pytest
|
pytest
|
||||||
python-dotenv
|
python-dotenv
|
||||||
openai[datalib]
|
openai[datalib]
|
||||||
tenacity
|
tenacity
|
||||||
|
tiktoken
|
Loading…
Add table
Add a link
Reference in a new issue