version '0.1.341' returns usage across providers

This commit is contained in:
Krrish Dholakia 2023-08-05 12:20:09 -07:00
parent 580918f360
commit 7575d7ea47
9 changed files with 89 additions and 54 deletions

View file

@ -1,14 +1,13 @@
import os, openai, cohere, replicate, sys import os, openai, cohere, replicate, sys
from typing import Any from typing import Any
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
import traceback
from functools import partial from functools import partial
import dotenv import dotenv, traceback, random, asyncio, time
import traceback from copy import deepcopy
import litellm import litellm
from litellm import client, logging, exception_type, timeout, get_optional_params from litellm import client, logging, exception_type, timeout, get_optional_params
import random import tiktoken
import asyncio encoding = tiktoken.get_encoding("cl100k_base")
from tenacity import ( from tenacity import (
retry, retry,
stop_after_attempt, stop_after_attempt,
@ -17,6 +16,17 @@ from tenacity import (
####### ENVIRONMENT VARIABLES ################### ####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
new_response = {
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant"
}
}
]
}
# TODO move this to utils.py # TODO move this to utils.py
# TODO add translations # TODO add translations
# TODO see if this worked - model_name == krrish # TODO see if this worked - model_name == krrish
@ -44,6 +54,8 @@ def completion(
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
): ):
try: try:
global new_response
model_response = deepcopy(new_response) # deep copy the default response format so we can mutate it and it's thread-safe.
# check if user passed in any of the OpenAI optional params # check if user passed in any of the OpenAI optional params
optional_params = get_optional_params( optional_params = get_optional_params(
functions=functions, function_call=function_call, functions=functions, function_call=function_call,
@ -128,6 +140,15 @@ def completion(
model=model, model=model,
prompt = prompt prompt = prompt
) )
completion_response = response["choices"]["text"]
## LOGGING
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response
model_response["created"] = response["created"]
model_response["model"] = model
model_response["usage"] = response["usage"]
response = model_response
elif "replicate" in model: elif "replicate" in model:
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
# checking in case user set it to REPLICATE_API_KEY instead # checking in case user set it to REPLICATE_API_KEY instead
@ -153,19 +174,21 @@ def completion(
response = "" response = ""
for item in output: for item in output:
response += item response += item
new_response = { completion_response = response
"choices": [ ## LOGGING
{ logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
"finish_reason": "stop", prompt_tokens = len(encoding.encode(prompt))
"index": 0, completion_tokens = len(encoding.encode(completion_response))
"message": { ## RESPONSE OBJECT
"content": response, model_response["choices"][0]["message"]["content"] = completion_response
"role": "assistant" model_response["created"] = time.time()
} model_response["model"] = model
} model_response["usage"] = {
] "prompt_tokens": prompt_tokens,
} "completion_tokens": completion_tokens,
response = new_response "total_tokens": prompt_tokens + completion_tokens
}
response = model_response
elif model in litellm.anthropic_models: elif model in litellm.anthropic_models:
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
if api_key: if api_key:
@ -183,7 +206,6 @@ def completion(
prompt += f"{HUMAN_PROMPT}{message['content']}" prompt += f"{HUMAN_PROMPT}{message['content']}"
prompt += f"{AI_PROMPT}" prompt += f"{AI_PROMPT}"
anthropic = Anthropic() anthropic = Anthropic()
# check if user passed in max_tokens != float('inf')
if max_tokens != float('inf'): if max_tokens != float('inf'):
max_tokens_to_sample = max_tokens max_tokens_to_sample = max_tokens
else: else:
@ -196,20 +218,22 @@ def completion(
prompt=prompt, prompt=prompt,
max_tokens_to_sample=max_tokens_to_sample max_tokens_to_sample=max_tokens_to_sample
) )
new_response = { completion_response = completion.completion
"choices": [ ## LOGGING
{ logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
"finish_reason": "stop", prompt_tokens = anthropic.count_tokens(prompt)
"index": 0, completion_tokens = anthropic.count_tokens(completion_response)
"message": { ## RESPONSE OBJECT
"content": completion.completion, print(f"model_response: {model_response}")
"role": "assistant" model_response["choices"][0]["message"]["content"] = completion_response
} model_response["created"] = time.time()
} model_response["model"] = model
] model_response["usage"] = {
} "prompt_tokens": prompt_tokens,
print_verbose(f"new response: {new_response}") "completion_tokens": completion_tokens,
response = new_response "total_tokens": prompt_tokens + completion_tokens
}
response = model_response
elif model in litellm.cohere_models: elif model in litellm.cohere_models:
if api_key: if api_key:
cohere_key = api_key cohere_key = api_key
@ -226,19 +250,21 @@ def completion(
model=model, model=model,
prompt = prompt prompt = prompt
) )
new_response = { completion_response = response[0].text
"choices": [ ## LOGGING
{ logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
"finish_reason": "stop", prompt_tokens = len(encoding.encode(prompt))
"index": 0, completion_tokens = len(encoding.encode(completion_response))
"message": { ## RESPONSE OBJECT
"content": response[0].text, model_response["choices"][0]["message"]["content"] = completion_response
"role": "assistant" model_response["created"] = time.time()
} model_response["model"] = model
} model_response["usage"] = {
], "prompt_tokens": prompt_tokens,
} "completion_tokens": completion_tokens,
response = new_response "total_tokens": prompt_tokens + completion_tokens
}
response = model_response
else: else:
## LOGGING ## LOGGING
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)

View file

@ -8,11 +8,14 @@ import pytest
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
litellm.set_verbose = True # litellm.set_verbose = True
user_message = "Hello, whats the weather in San Francisco??" user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] messages = [{ "content": user_message,"role": "user"}]
def logger_fn(user_model_dict):
print(f"user_model_dict: {user_model_dict}")
def test_completion_openai(): def test_completion_openai():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)
@ -83,7 +86,7 @@ def test_completion_azure():
def test_completion_claude(): def test_completion_claude():
try: try:
response = completion(model="claude-instant-1", messages=messages) response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
@ -97,7 +100,8 @@ def test_completion_cohere():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
# [TODO] improve our try-except block to handle for these
# def test_completion_replicate_llama(): # def test_completion_replicate_llama():
# model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" # model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
# try: # try:

View file

@ -33,13 +33,16 @@ def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=N
if azure: if azure:
model_call_details["azure"] = azure model_call_details["azure"] = azure
if exception: if exception:
model_call_details["original_exception"] = exception model_call_details["exception"] = exception
if litellm.telemetry: if litellm.telemetry:
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`. safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
if input: if input:
model_call_details["input"] = input model_call_details["input"] = input
if len(additional_args):
model_call_details["additional_args"] = additional_args
# log additional call details -> api key, etc. # log additional call details -> api key, etc.
if model: if model:
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models: if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
@ -53,7 +56,6 @@ def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=N
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
elif model in litellm.cohere_models: elif model in litellm.cohere_models:
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
model_call_details["additional_args"] = additional_args
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}") print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
if logger_fn and callable(logger_fn): if logger_fn and callable(logger_fn):
@ -318,6 +320,7 @@ def exception_type(model, original_exception):
exception_type = type(original_exception).__name__ exception_type = type(original_exception).__name__
else: else:
exception_type = "" exception_type = ""
logging(model=model, additional_args={"error_str": error_str, "exception_type": exception_type, "original_exception": original_exception}, logger_fn=user_logger_fn)
if "claude" in model: #one of the anthropics if "claude" in model: #one of the anthropics
if "status_code" in original_exception: if "status_code" in original_exception:
print_verbose(f"status_code: {original_exception.status_code}") print_verbose(f"status_code: {original_exception.status_code}")
@ -357,7 +360,7 @@ def exception_type(model, original_exception):
raise original_exception raise original_exception
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, additional_args={"original_exception": original_exception}, exception=e) logging(logger_fn=user_logger_fn, additional_args={"exception_mapping_worked": exception_mapping_worked, "original_exception": original_exception}, exception=e)
if exception_mapping_worked: if exception_mapping_worked:
raise e raise e
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.331" version = "0.1.341"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"
@ -15,6 +15,7 @@ anthropic = "^0.3.7"
replicate = "^0.10.0" replicate = "^0.10.0"
python-dotenv = "^1.0.0" python-dotenv = "^1.0.0"
tenacity = "^8.0.1" tenacity = "^8.0.1"
tiktoken = "^0.4.0"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

View file

@ -6,3 +6,4 @@ pytest
python-dotenv python-dotenv
openai[datalib] openai[datalib]
tenacity tenacity
tiktoken