add linting

This commit is contained in:
ishaan-jaff 2023-08-18 11:05:05 -07:00
parent 8ef47524bf
commit 15b1da9dc8
40 changed files with 3110 additions and 1709 deletions

View file

@ -1,16 +1,17 @@
import threading import threading
success_callback = [] success_callback = []
failure_callback = [] failure_callback = []
set_verbose=False set_verbose = False
telemetry=True telemetry = True
max_tokens = 256 # OpenAI Defaults max_tokens = 256 # OpenAI Defaults
retry = True retry = True
api_key = None api_key = None
openai_key = None openai_key = None
azure_key = None azure_key = None
anthropic_key = None anthropic_key = None
replicate_key = None replicate_key = None
cohere_key = None cohere_key = None
openrouter_key = None openrouter_key = None
huggingface_key = None huggingface_key = None
vertex_project = None vertex_project = None
@ -19,33 +20,99 @@ caching = False
hugging_api_token = None hugging_api_token = None
togetherai_api_key = None togetherai_api_key = None
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
####### THREAD-SPECIFIC DATA ################### ####### THREAD-SPECIFIC DATA ###################
class MyLocal(threading.local): class MyLocal(threading.local):
def __init__(self): def __init__(self):
self.user = "Hello World" self.user = "Hello World"
_thread_context = MyLocal() _thread_context = MyLocal()
def identify(event_details): def identify(event_details):
# Store user in thread local data # Store user in thread local data
if "user" in event_details: if "user" in event_details:
_thread_context.user = event_details["user"] _thread_context.user = event_details["user"]
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc. ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
api_base = None api_base = None
headers = None headers = None
@ -56,60 +123,48 @@ config_path = None
secret_manager_client = None secret_manager_client = None
####### COMPLETION MODELS ################### ####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [ open_ai_chat_completion_models = [
"gpt-4", "gpt-4",
"gpt-4-0613", "gpt-4-0613",
"gpt-4-32k", "gpt-4-32k",
"gpt-4-32k-0613", "gpt-4-32k-0613",
################# #################
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k-0613",
]
open_ai_text_completion_models = [
'text-davinci-003'
] ]
open_ai_text_completion_models = ["text-davinci-003"]
cohere_models = [ cohere_models = [
'command-nightly', "command-nightly",
"command", "command",
"command-light", "command-light",
"command-medium-beta", "command-medium-beta",
"command-xlarge-beta" "command-xlarge-beta",
] ]
anthropic_models = [ anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
"claude-2",
"claude-instant-1",
"claude-instant-1.2"
]
replicate_models = [ replicate_models = [
"replicate/" "replicate/"
] # placeholder, to make sure we accept any replicate model in our model_list ] # placeholder, to make sure we accept any replicate model in our model_list
openrouter_models = [ openrouter_models = [
'google/palm-2-codechat-bison', "google/palm-2-codechat-bison",
'google/palm-2-chat-bison', "google/palm-2-chat-bison",
'openai/gpt-3.5-turbo', "openai/gpt-3.5-turbo",
'openai/gpt-3.5-turbo-16k', "openai/gpt-3.5-turbo-16k",
'openai/gpt-4-32k', "openai/gpt-4-32k",
'anthropic/claude-2', "anthropic/claude-2",
'anthropic/claude-instant-v1', "anthropic/claude-instant-v1",
'meta-llama/llama-2-13b-chat', "meta-llama/llama-2-13b-chat",
'meta-llama/llama-2-70b-chat' "meta-llama/llama-2-70b-chat",
] ]
vertex_chat_models = [ vertex_chat_models = ["chat-bison", "chat-bison@001"]
"chat-bison",
"chat-bison@001"
]
vertex_text_models = [ vertex_text_models = ["text-bison", "text-bison@001"]
"text-bison",
"text-bison@001"
]
huggingface_models = [ huggingface_models = [
"meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-hf",
@ -124,25 +179,56 @@ huggingface_models = [
"meta-llama/Llama-2-13b-chat", "meta-llama/Llama-2-13b-chat",
"meta-llama/Llama-2-70b", "meta-llama/Llama-2-70b",
"meta-llama/Llama-2-70b-chat", "meta-llama/Llama-2-70b-chat",
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
ai21_models = [ ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
"j2-ultra",
"j2-mid", model_list = (
"j2-light" open_ai_chat_completion_models
+ open_ai_text_completion_models
+ cohere_models
+ anthropic_models
+ replicate_models
+ openrouter_models
+ huggingface_models
+ vertex_chat_models
+ vertex_text_models
+ ai21_models
)
provider_list = [
"openai",
"cohere",
"anthropic",
"replicate",
"huggingface",
"together_ai",
"openrouter",
"vertex_ai",
"ai21",
] ]
model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models
provider_list = ["openai", "cohere", "anthropic", "replicate", "huggingface", "together_ai", "openrouter", "vertex_ai", "ai21"]
####### EMBEDDING MODELS ################### ####### EMBEDDING MODELS ###################
open_ai_embedding_models = [ open_ai_embedding_models = ["text-embedding-ada-002"]
'text-embedding-ada-002'
]
from .timeout import timeout from .timeout import timeout
from .testing import * from .testing import *
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, get_litellm_params from .utils import (
client,
logging,
exception_type,
get_optional_params,
modify_integration,
token_counter,
cost_per_token,
completion_cost,
get_litellm_params,
)
from .main import * # Import all the symbols from main.py from .main import * # Import all the symbols from main.py
from .integrations import * from .integrations import *
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)

View file

@ -1,12 +1,21 @@
## LiteLLM versions of the OpenAI Exception Types ## LiteLLM versions of the OpenAI Exception Types
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
class AuthenticationError(AuthenticationError): class AuthenticationError(AuthenticationError):
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider):
self.status_code = 401 self.status_code = 401
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class InvalidRequestError(InvalidRequestError): class InvalidRequestError(InvalidRequestError):
@ -15,7 +24,9 @@ class InvalidRequestError(InvalidRequestError):
self.message = message self.message = message
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message, f"{self.model}") # Call the base class constructor with the parameters it needs super().__init__(
self.message, f"{self.model}"
) # Call the base class constructor with the parameters it needs
class RateLimitError(RateLimitError): class RateLimitError(RateLimitError):
@ -23,21 +34,29 @@ class RateLimitError(RateLimitError):
self.status_code = 429 self.status_code = 429
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class ServiceUnavailableError(ServiceUnavailableError): class ServiceUnavailableError(ServiceUnavailableError):
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider):
self.status_code = 500 self.status_code = 500
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class OpenAIError(OpenAIError): class OpenAIError(OpenAIError):
def __init__(self, original_exception): def __init__(self, original_exception):
self.status_code = original_exception.http_status self.status_code = original_exception.http_status
super().__init__(http_body=original_exception.http_body, super().__init__(
http_status=original_exception.http_status, http_body=original_exception.http_body,
json_body=original_exception.json_body, http_status=original_exception.http_status,
headers=original_exception.headers, json_body=original_exception.json_body,
code=original_exception.code) headers=original_exception.headers,
self.llm_provider = "openai" code=original_exception.code,
)
self.llm_provider = "openai"

View file

@ -1 +1 @@
from . import * from . import *

View file

@ -1,53 +1,121 @@
#### What this does #### #### What this does ####
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime import datetime
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class AISpendLogger: class AISpendLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.account_id = os.getenv("AISPEND_ACCOUNT_ID") self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
self.api_key = os.getenv("AISPEND_API_KEY") self.api_key = os.getenv("AISPEND_API_KEY")
def price_calculator(self, model, response_obj, start_time, end_time): def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map # try and find if the model is in the model_cost map
# else default to the average of the costs # else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
elif "replicate" in model: * response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else: else:
# calculate average input cost # calculate average input cost
input_cost_sum = 0 input_cost_sum = 0
output_cost_sum = 0 output_cost_sum = 0
for model in model_cost: for model in model_cost:
@ -55,37 +123,52 @@ class AISpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, response_obj, start_time, end_time, print_verbose): def log_event(self, model, response_obj, start_time, end_time, print_verbose):
# Method definition # Method definition
try: try:
print_verbose(f"AISpend Logging - Enters logging function for model {model}") print_verbose(
f"AISpend Logging - Enters logging function for model {model}"
)
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data" url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
headers = { headers = {
'Authorization': f'Bearer {self.api_key}', "Authorization": f"Bearer {self.api_key}",
'Content-Type': 'application/json' "Content-Type": "application/json",
} }
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d') response_timestamp = datetime.datetime.fromtimestamp(
int(response_obj["created"])
).strftime("%Y-%m-%d")
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100 prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100 completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
data = [{ data = [
"requests": 1, {
"requests_context": 1, "requests": 1,
"context_tokens": response_obj["usage"]["prompt_tokens"], "requests_context": 1,
"requests_generated": 1, "context_tokens": response_obj["usage"]["prompt_tokens"],
"generated_tokens": response_obj["usage"]["completion_tokens"], "requests_generated": 1,
"recorded_date": response_timestamp, "generated_tokens": response_obj["usage"]["completion_tokens"],
"model_id": response_obj["model"], "recorded_date": response_timestamp,
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent, "model_id": response_obj["model"],
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
}] "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
}
]
print_verbose(f"AISpend Logging - final data object: {data}") print_verbose(f"AISpend Logging - final data object: {data}")
except: except:

View file

@ -1,52 +1,120 @@
#### What this does #### #### What this does ####
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime import datetime
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class BerriSpendLogger: class BerriSpendLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID") self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
def price_calculator(self, model, response_obj, start_time, end_time): def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map # try and find if the model is in the model_cost map
# else default to the average of the costs # else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
elif "replicate" in model: * response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else: else:
# calculate average input cost # calculate average input cost
input_cost_sum = 0 input_cost_sum = 0
output_cost_sum = 0 output_cost_sum = 0
for model in model_cost: for model in model_cost:
@ -54,42 +122,59 @@ class BerriSpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose): def log_event(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition # Method definition
try: try:
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}") print_verbose(
f"BerriSpend Logging - Enters logging function for model {model}"
)
url = f"https://berrispend.berri.ai/spend" url = f"https://berrispend.berri.ai/spend"
headers = { headers = {"Content-Type": "application/json"}
'Content-Type': 'application/json'
}
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time-start_time).total_seconds() response_time = (end_time - start_time).total_seconds()
if "response" in response_obj: if "response" in response_obj:
data = [{ data = [
"response_time": response_time, {
"model_id": response_obj["model"], "response_time": response_time,
"total_cost": total_cost, "model_id": response_obj["model"],
"messages": messages, "total_cost": total_cost,
"response": response_obj['choices'][0]['message']['content'], "messages": messages,
"account_id": self.account_id "response": response_obj["choices"][0]["message"]["content"],
}] "account_id": self.account_id,
}
]
elif "error" in response_obj: elif "error" in response_obj:
data = [{ data = [
"response_time": response_time, {
"model_id": response_obj["model"], "response_time": response_time,
"total_cost": total_cost, "model_id": response_obj["model"],
"messages": messages, "total_cost": total_cost,
"error": response_obj['error'], "messages": messages,
"account_id": self.account_id "error": response_obj["error"],
}] "account_id": self.account_id,
}
]
print_verbose(f"BerriSpend Logging - final data object: {data}") print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)

View file

@ -2,19 +2,24 @@
# On success, logs events to Helicone # On success, logs events to Helicone
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
class HeliconeLogger: class HeliconeLogger:
# Class variables or attributes # Class variables or attributes
helicone_model_list = ["gpt", "claude"] helicone_model_list = ["gpt", "claude"]
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.provider_url = "https://api.openai.com/v1" self.provider_url = "https://api.openai.com/v1"
self.key = os.getenv('HELICONE_API_KEY') self.key = os.getenv("HELICONE_API_KEY")
def claude_mapping(self, model, messages, response_obj): def claude_mapping(self, model, messages, response_obj):
from anthropic import HUMAN_PROMPT, AI_PROMPT from anthropic import HUMAN_PROMPT, AI_PROMPT
prompt = f"{HUMAN_PROMPT}"
prompt = f"{HUMAN_PROMPT}"
for message in messages: for message in messages:
if "role" in message: if "role" in message:
if message["role"] == "user": if message["role"] == "user":
@ -26,48 +31,84 @@ class HeliconeLogger:
prompt += f"{AI_PROMPT}" prompt += f"{AI_PROMPT}"
claude_provider_request = {"model": model, "prompt": prompt} claude_provider_request = {"model": model, "prompt": prompt}
claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"} claude_response_obj = {
"completion": response_obj["choices"][0]["message"]["content"],
"model": model,
"stop_reason": "stop_sequence",
}
return claude_provider_request, claude_response_obj return claude_provider_request, claude_response_obj
def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose): def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition # Method definition
try: try:
print_verbose(f"Helicone Logging - Enters logging function for model {model}") print_verbose(
model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo" f"Helicone Logging - Enters logging function for model {model}"
)
model = (
model
if any(
accepted_model in model
for accepted_model in self.helicone_model_list
)
else "gpt-3.5-turbo"
)
provider_request = {"model": model, "messages": messages} provider_request = {"model": model, "messages": messages}
if "claude" in model: if "claude" in model:
provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj) provider_request, response_obj = self.claude_mapping(
model=model, messages=messages, response_obj=response_obj
)
providerResponse = { providerResponse = {
"json": response_obj, "json": response_obj,
"headers": {"openai-version": "2020-10-01"}, "headers": {"openai-version": "2020-10-01"},
"status": 200 "status": 200,
} }
# Code to be executed # Code to be executed
url = "https://api.hconeai.com/oai/v1/log" url = "https://api.hconeai.com/oai/v1/log"
headers = { headers = {
'Authorization': f'Bearer {self.key}', "Authorization": f"Bearer {self.key}",
'Content-Type': 'application/json' "Content-Type": "application/json",
} }
start_time_seconds = int(start_time.timestamp()) start_time_seconds = int(start_time.timestamp())
start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000) start_time_milliseconds = int(
(start_time.timestamp() - start_time_seconds) * 1000
)
end_time_seconds = int(end_time.timestamp()) end_time_seconds = int(end_time.timestamp())
end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000) end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000
)
data = { data = {
"providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}}, "providerRequest": {
"url": self.provider_url,
"json": provider_request,
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
},
"providerResponse": providerResponse, "providerResponse": providerResponse,
"timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..} "timing": {
"startTime": {
"seconds": start_time_seconds,
"milliseconds": start_time_milliseconds,
},
"endTime": {
"seconds": end_time_seconds,
"milliseconds": end_time_milliseconds,
},
}, # {"seconds": .., "milliseconds": ..}
} }
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)
if response.status_code == 200: if response.status_code == 200:
print_verbose("Helicone Logging - Success!") print_verbose("Helicone Logging - Success!")
else: else:
print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}") print_verbose(
f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
)
print_verbose(f"Helicone Logging - Error {response.text}") print_verbose(f"Helicone Logging - Error {response.text}")
except: except:
# traceback.print_exc() # traceback.print_exc()
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}") print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -3,31 +3,94 @@
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime, subprocess, sys import datetime, subprocess, sys
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class Supabase: class Supabase:
# Class variables or attributes # Class variables or attributes
supabase_table_name = "request_logs" supabase_table_name = "request_logs"
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.supabase_url = os.getenv("SUPABASE_URL") self.supabase_url = os.getenv("SUPABASE_URL")
@ -35,9 +98,11 @@ class Supabase:
try: try:
import supabase import supabase
except ImportError: except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase']) subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
import supabase import supabase
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key) self.supabase_client = supabase.create_client(
self.supabase_url, self.supabase_key
)
def price_calculator(self, model, response_obj, start_time, end_time): def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map # try and find if the model is in the model_cost map
@ -45,17 +110,23 @@ class Supabase:
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
elif "replicate" in model: * response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else: else:
# calculate average input cost # calculate average input cost
input_cost_sum = 0 input_cost_sum = 0
output_cost_sum = 0 output_cost_sum = 0
for model in model_cost: for model in model_cost:
@ -63,41 +134,75 @@ class Supabase:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose): def log_event(
self,
model,
messages,
end_user,
response_obj,
start_time,
end_time,
print_verbose,
):
try: try:
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}") print_verbose(
f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
)
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time-start_time).total_seconds() response_time = (end_time - start_time).total_seconds()
if "choices" in response_obj: if "choices" in response_obj:
supabase_data_obj = { supabase_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "messages": messages,
"response": response_obj['choices'][0]['message']['content'], "response": response_obj["choices"][0]["message"]["content"],
"end_user": end_user "end_user": end_user,
} }
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") print_verbose(
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
elif "error" in response_obj: elif "error" in response_obj:
supabase_data_obj = { supabase_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "messages": messages,
"error": response_obj['error'], "error": response_obj["error"],
"end_user": end_user "end_user": end_user,
} }
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") print_verbose(
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
except: except:
# traceback.print_exc() # traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}") print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")

View file

@ -1 +1 @@
from . import * from . import *

View file

@ -2,54 +2,77 @@ import os, json
from enum import Enum from enum import Enum
import requests import requests
from litellm import logging from litellm import logging
import time import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
class AnthropicConstants(Enum): class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman:" HUMAN_PROMPT = "\n\nHuman:"
AI_PROMPT = "\n\nAssistant:" AI_PROMPT = "\n\nAssistant:"
class AnthropicError(Exception): class AnthropicError(Exception):
def __init__(self, status_code, message): def __init__(self, status_code, message):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class AnthropicLLM:
class AnthropicLLM:
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
self.encoding = encoding self.encoding = encoding
self.default_max_tokens_to_sample = default_max_tokens_to_sample self.default_max_tokens_to_sample = default_max_tokens_to_sample
self.completion_url = "https://api.anthropic.com/v1/complete" self.completion_url = "https://api.anthropic.com/v1/complete"
self.api_key = api_key self.api_key = api_key
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model def validate_environment(
self, api_key
): # set up the environment required to run the model
# set the api key # set the api key
if self.api_key == None: if self.api_key == None:
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params") raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
self.api_key = api_key self.api_key = api_key
self.headers = { self.headers = {
"accept": "application/json", "accept": "application/json",
"anthropic-version": "2023-06-01", "anthropic-version": "2023-06-01",
"content-type": "application/json", "content-type": "application/json",
"x-api-key": self.api_key "x-api-key": self.api_key,
} }
def completion(self, model: str, messages: list, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls def completion(
self,
model: str,
messages: list,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
model = model model = model
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
for message in messages: for message in messages:
if "role" in message: if "role" in message:
if message["role"] == "user": if message["role"] == "user":
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}" prompt += (
f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
)
else: else:
prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}" prompt += (
f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
)
else: else:
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}" prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
prompt += f"{AnthropicConstants.AI_PROMPT.value}" prompt += f"{AnthropicConstants.AI_PROMPT.value}"
if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'): if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
"inf"
):
max_tokens = optional_params["max_tokens"] max_tokens = optional_params["max_tokens"]
else: else:
max_tokens = self.default_max_tokens_to_sample max_tokens = self.default_max_tokens_to_sample
@ -57,39 +80,66 @@ class AnthropicLLM:
"model": model, "model": model,
"prompt": prompt, "prompt": prompt,
"max_tokens_to_sample": max_tokens, "max_tokens_to_sample": max_tokens,
**optional_params **optional_params,
} }
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data)) response = requests.post(
self.completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}") print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()
if "error" in completion_response: if "error" in completion_response:
raise AnthropicError(message=completion_response["error"], status_code=response.status_code) raise AnthropicError(
message=completion_response["error"],
status_code=response.status_code,
)
else: else:
model_response["choices"][0]["message"]["content"] = completion_response["completion"] model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
## CALCULATING USAGE ## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here prompt_tokens = len(
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here self.encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the anthropic tokenizer here
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens "total_tokens": prompt_tokens + completion_tokens,
} }
return model_response return model_response
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass pass

View file

@ -1,11 +1,12 @@
## This is a template base class to be used for adding new LLM providers via API calls ## This is a template base class to be used for adding new LLM providers via API calls
class BaseLLM():
def validate_environment(): # set up the environment required to run the model
pass
def completion(): # logic for parsing in - calling - parsing out model completion calls class BaseLLM:
def validate_environment(): # set up the environment required to run the model
pass pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def completion(): # logic for parsing in - calling - parsing out model completion calls
pass pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass

View file

@ -3,31 +3,47 @@ import os, json
from enum import Enum from enum import Enum
import requests import requests
from litellm import logging from litellm import logging
import time import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
class HuggingfaceError(Exception): class HuggingfaceError(Exception):
def __init__(self, status_code, message): def __init__(self, status_code, message):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class HuggingfaceRestAPILLM():
class HuggingfaceRestAPILLM:
def __init__(self, encoding, api_key=None) -> None: def __init__(self, encoding, api_key=None) -> None:
self.encoding = encoding self.encoding = encoding
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model def validate_environment(
self, api_key
): # set up the environment required to run the model
self.headers = { self.headers = {
"content-type": "application/json", "content-type": "application/json",
} }
# get the api key if it exists in the environment or is passed in, but don't require it # get the api key if it exists in the environment or is passed in, but don't require it
self.api_key = api_key self.api_key = api_key
if self.api_key != None: if self.api_key != None:
self.headers["Authorization"] = f"Bearer {self.api_key}" self.headers["Authorization"] = f"Bearer {self.api_key}"
def completion(self, model: str, messages: list, custom_api_base: str, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls def completion(
self,
model: str,
messages: list,
custom_api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
if custom_api_base: if custom_api_base:
completion_url = custom_api_base completion_url = custom_api_base
elif "HF_API_BASE" in os.environ: elif "HF_API_BASE" in os.environ:
@ -35,7 +51,9 @@ class HuggingfaceRestAPILLM():
else: else:
completion_url = f"https://api-inference.huggingface.co/models/{model}" completion_url = f"https://api-inference.huggingface.co/models/{model}"
prompt = "" prompt = ""
if "meta-llama" in model and "chat" in model: # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2 if (
"meta-llama" in model and "chat" in model
): # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
prompt = "<s>" prompt = "<s>"
for message in messages: for message in messages:
if message["role"] == "system": if message["role"] == "system":
@ -47,8 +65,8 @@ class HuggingfaceRestAPILLM():
else: else:
for message in messages: for message in messages:
prompt += f"{message['content']}" prompt += f"{message['content']}"
### MAP INPUT PARAMS ### MAP INPUT PARAMS
# max tokens # max tokens
if "max_tokens" in optional_params: if "max_tokens" in optional_params:
value = optional_params.pop("max_tokens") value = optional_params.pop("max_tokens")
optional_params["max_new_tokens"] = value optional_params["max_new_tokens"] = value
@ -57,14 +75,33 @@ class HuggingfaceRestAPILLM():
# "parameters": optional_params # "parameters": optional_params
} }
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = requests.post(completion_url, headers=self.headers, data=json.dumps(data)) response = requests.post(
completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}") print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()
@ -72,24 +109,32 @@ class HuggingfaceRestAPILLM():
if isinstance(completion_response, dict) and "error" in completion_response: if isinstance(completion_response, dict) and "error" in completion_response:
print_verbose(f"completion error: {completion_response['error']}") print_verbose(f"completion error: {completion_response['error']}")
print_verbose(f"response.status_code: {response.status_code}") print_verbose(f"response.status_code: {response.status_code}")
raise HuggingfaceError(message=completion_response["error"], status_code=response.status_code) raise HuggingfaceError(
message=completion_response["error"],
status_code=response.status_code,
)
else: else:
model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"] model_response["choices"][0]["message"][
"content"
] = completion_response[0]["generated_text"]
## CALCULATING USAGE ## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the llama2 tokenizer here prompt_tokens = len(
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the llama2 tokenizer here self.encoding.encode(prompt)
) ##[TODO] use the llama2 tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the llama2 tokenizer here
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens "total_tokens": prompt_tokens + completion_tokens,
} }
return model_response return model_response
pass pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass pass

File diff suppressed because it is too large Load diff

View file

@ -1,53 +1,82 @@
import litellm import litellm
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import traceback import traceback
def testing_batch_completion(*args, **kwargs): def testing_batch_completion(*args, **kwargs):
try: try:
batch_models = args[0] if len(args) > 0 else kwargs.pop("models") ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...] batch_models = (
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages") args[0] if len(args) > 0 else kwargs.pop("models")
results = [] ) ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
completions = [] batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
exceptions = [] results = []
times = [] completions = []
with ThreadPoolExecutor() as executor: exceptions = []
for model in batch_models: times = []
kwargs_modified = dict(kwargs) with ThreadPoolExecutor() as executor:
args_modified = list(args) for model in batch_models:
if len(args) > 0: kwargs_modified = dict(kwargs)
args_modified[0] = model["model"] args_modified = list(args)
else: if len(args) > 0:
kwargs_modified["model"] = model["model"] if isinstance(model, dict) and "model" in model else model # if model is a dictionary get it's value else assume it's a string args_modified[0] = model["model"]
kwargs_modified["custom_llm_provider"] = model["custom_llm_provider"] if isinstance(model, dict) and "custom_llm_provider" in model else None
kwargs_modified["custom_api_base"] = model["custom_api_base"] if isinstance(model, dict) and "custom_api_base" in model else None
for message_list in batch_messages:
if len(args) > 1:
args_modified[1] = message_list
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
else: else:
kwargs_modified["messages"] = message_list kwargs_modified["model"] = (
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified) model["model"]
completions.append((future, message_list)) if isinstance(model, dict) and "model" in model
else model
# Retrieve the results and calculate elapsed time for each completion call ) # if model is a dictionary get it's value else assume it's a string
for completion in completions: kwargs_modified["custom_llm_provider"] = (
future, message_list = completion model["custom_llm_provider"]
start_time = time.time() if isinstance(model, dict) and "custom_llm_provider" in model
try: else None
result = future.result() )
end_time = time.time() kwargs_modified["custom_api_base"] = (
elapsed_time = end_time - start_time model["custom_api_base"]
result_dict = {"status": "succeeded", "response": future.result(), "prompt": message_list, "response_time": elapsed_time} if isinstance(model, dict) and "custom_api_base" in model
results.append(result_dict) else None
except Exception as e: )
end_time = time.time() for message_list in batch_messages:
elapsed_time = end_time - start_time if len(args) > 1:
result_dict = {"status": "failed", "response": e, "response_time": elapsed_time} args_modified[1] = message_list
results.append(result_dict) future = executor.submit(
return results litellm.completion, *args_modified, **kwargs_modified
except: )
traceback.print_exc() else:
kwargs_modified["messages"] = message_list
future = executor.submit(
litellm.completion, *args_modified, **kwargs_modified
)
completions.append((future, message_list))
# Retrieve the results and calculate elapsed time for each completion call
for completion in completions:
future, message_list = completion
start_time = time.time()
try:
result = future.result()
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "succeeded",
"response": future.result(),
"prompt": message_list,
"response_time": elapsed_time,
}
results.append(result_dict)
except Exception as e:
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "failed",
"response": e,
"response_time": elapsed_time,
}
results.append(result_dict)
return results
except:
traceback.print_exc()
def duration_test_model(original_function): def duration_test_model(original_function):
def wrapper_function(*args, **kwargs): def wrapper_function(*args, **kwargs):
@ -70,22 +99,39 @@ def duration_test_model(original_function):
# Return the wrapper function # Return the wrapper function
return wrapper_function return wrapper_function
@duration_test_model @duration_test_model
def load_test_model(models: list, prompt: str = None, num_calls: int = None): def load_test_model(models: list, prompt: str = None, num_calls: int = None):
test_calls = 100 test_calls = 100
if num_calls: if num_calls:
test_calls = num_calls test_calls = num_calls
input_prompt = prompt if prompt else "Hey, how's it going?" input_prompt = prompt if prompt else "Hey, how's it going?"
messages = [{"role": "user", "content": prompt}] if prompt else [{"role": "user", "content": input_prompt}] messages = (
full_message_list = [messages for _ in range(test_calls)] # call it as many times as set by user to load test models [{"role": "user", "content": prompt}]
start_time = time.time() if prompt
try: else [{"role": "user", "content": input_prompt}]
results = testing_batch_completion(models=models, messages=full_message_list) )
end_time = time.time() full_message_list = [
response_time = end_time - start_time messages for _ in range(test_calls)
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "results": results} ] # call it as many times as set by user to load test models
except Exception as e: start_time = time.time()
traceback.print_exc() try:
end_time = time.time() results = testing_batch_completion(models=models, messages=full_message_list)
response_time = end_time - start_time end_time = time.time()
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "exception": e} response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"results": results,
}
except Exception as e:
traceback.print_exc()
end_time = time.time()
response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"exception": e,
}

View file

@ -3,27 +3,37 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
litellm.set_verbose = False litellm.set_verbose = False
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
## Test 1: Setting key dynamically ## Test 1: Setting key dynamically
temp_key = os.environ.get("ANTHROPIC_API_KEY") temp_key = os.environ.get("ANTHROPIC_API_KEY")
os.environ["ANTHROPIC_API_KEY"] = "bad-key" os.environ["ANTHROPIC_API_KEY"] = "bad-key"
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn, api_key=temp_key) response = completion(
model="claude-instant-1",
messages=messages,
logger_fn=logger_fn,
api_key=temp_key,
)
print(f"response: {response}") print(f"response: {response}")
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
os.environ["ANTHROPIC_API_KEY"] = temp_key os.environ["ANTHROPIC_API_KEY"] = temp_key
@ -31,11 +41,13 @@ os.environ["ANTHROPIC_API_KEY"] = temp_key
## Test 2: Setting key via __init__ params ## Test 2: Setting key via __init__ params
litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY") litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
os.environ.pop("ANTHROPIC_API_KEY") os.environ.pop("ANTHROPIC_API_KEY")
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}") print(f"response: {response}")
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
os.environ["ANTHROPIC_API_KEY"] = temp_key os.environ["ANTHROPIC_API_KEY"] = temp_key

View file

@ -5,17 +5,22 @@ import sys, os
import pytest import pytest
import traceback import traceback
import asyncio import asyncio
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from litellm import acompletion from litellm import acompletion
async def test_get_response(): async def test_get_response():
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
try: try:
response = await acompletion(model="gpt-3.5-turbo", messages=messages) response = await acompletion(model="gpt-3.5-turbo", messages=messages)
except Exception as e: except Exception as e:
pytest.fail(f"error occurred: {e}") pytest.fail(f"error occurred: {e}")
return response return response
response = asyncio.run(test_get_response()) response = asyncio.run(test_get_response())
print(response) print(response)

View file

@ -1,16 +1,17 @@
#### What this tests #### #### What this tests ####
# This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens. # This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens.
# Expect to add more edge cases to this over time. # Expect to add more edge cases to this over time.
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Get the current directory of the script # Get the current directory of the script
current_dir = os.path.dirname(os.path.abspath(__file__)) current_dir = os.path.dirname(os.path.abspath(__file__))
# Get the parent directory by joining the current directory with '..' # Get the parent directory by joining the current directory with '..'
parent_dir = os.path.join(current_dir, '../..') parent_dir = os.path.join(current_dir, "../..")
# Add the parent directory to the system path # Add the parent directory to the system path
sys.path.append(parent_dir) sys.path.append(parent_dir)
@ -26,7 +27,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
model_val = None model_val = None
@ -35,18 +36,18 @@ def test_completion_with_empty_model():
try: try:
response = completion(model=model_val, messages=messages) response = completion(model=model_val, messages=messages)
except Exception as e: except Exception as e:
print(f"error occurred: {e}") print(f"error occurred: {e}")
pass pass
#bad key # bad key
temp_key = os.environ.get("OPENAI_API_KEY") temp_key = os.environ.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = "bad-key" os.environ["OPENAI_API_KEY"] = "bad-key"
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)
print(f"response: {response}") print(f"response: {response}")
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
os.environ["OPENAI_API_KEY"] = temp_key os.environ["OPENAI_API_KEY"] = temp_key

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import batch_completion from litellm import batch_completion
@ -14,4 +17,4 @@ model = "gpt-3.5-turbo"
result = batch_completion(model=model, messages=messages) result = batch_completion(model=model, messages=messages)
print(result) print(result)
print(len(result)) print(len(result))

View file

@ -19,7 +19,7 @@
# #openai call # #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) # response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call # #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) # response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -1,9 +1,13 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest import pytest
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -12,7 +16,6 @@ litellm.caching = True
messages = [{"role": "user", "content": "who is ishaan Github? "}] messages = [{"role": "user", "content": "who is ishaan Github? "}]
# test if response cached # test if response cached
def test_caching(): def test_caching():
try: try:
@ -27,9 +30,5 @@ def test_caching():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
except Exception as e: except Exception as e:
litellm.caching = False litellm.caching = False
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -14,17 +16,22 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
litellm.set_verbose = True litellm.set_verbose = True
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
# print(f"model call details: {model_call_object}") # print(f"model call details: {model_call_object}")
pass pass
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def test_completion_openai(): def test_completion_openai():
try: try:
print("running query") print("running query")
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) response = completion(
model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}") print(f"response: {response}")
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
@ -34,33 +41,46 @@ def test_completion_openai():
def test_completion_claude(): def test_completion_claude():
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_non_openai(): def test_completion_non_openai():
try: try:
response = completion(model="command-nightly", messages=messages, logger_fn=logger_fn) response = completion(
model="command-nightly", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_embedding_openai(): def test_embedding_openai():
try: try:
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) response = embedding(
model="text-embedding-ada-002", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)[:50]}") print(f"response: {str(response)[:50]}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_bad_azure_embedding(): def test_bad_azure_embedding():
try: try:
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) response = embedding(
model="chatgpt-test", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)[:50]}") print(f"response: {str(response)[:50]}")
except Exception as e: except Exception as e:
pass pass
# def test_good_azure_embedding(): # def test_good_azure_embedding():
# try: # try:
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) # response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
@ -68,4 +88,3 @@ def test_bad_azure_embedding():
# print(f"response: {str(response)[:50]}") # print(f"response: {str(response)[:50]}")
# except Exception as e: # except Exception as e:
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")

View file

@ -1,44 +1,58 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest import pytest
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
# from infisical import InfisicalClient # from infisical import InfisicalClient
# litellm.set_verbose = True # litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
user_message = "Hello, whats the weather in San Francisco??" user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def logger_fn(user_model_dict): def logger_fn(user_model_dict):
print(f"user_model_dict: {user_model_dict}") print(f"user_model_dict: {user_model_dict}")
def test_completion_claude(): def test_completion_claude():
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_claude_stream(): def test_completion_claude_stream():
try: try:
messages = [ messages = [
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"} {
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
] ]
response = completion(model="claude-2", messages=messages, stream=True) response = completion(model="claude-2", messages=messages, stream=True)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# def test_completion_hf_api(): # def test_completion_hf_api():
# try: # try:
# user_message = "write some code to find the sum of two numbers" # user_message = "write some code to find the sum of two numbers"
@ -62,10 +76,12 @@ def test_completion_claude_stream():
def test_completion_cohere(): def test_completion_cohere():
try: try:
response = completion(model="command-nightly", messages=messages, max_tokens=100) response = completion(
model="command-nightly", messages=messages, max_tokens=100
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
print(f"str response{response_str}") print(f"str response{response_str}")
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
if type(response_str) != str: if type(response_str) != str:
@ -75,24 +91,31 @@ def test_completion_cohere():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_cohere_stream(): def test_completion_cohere_stream():
try: try:
messages = [ messages = [
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"} {
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
] ]
response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50) response = completion(
model="command-nightly", messages=messages, stream=True, max_tokens=50
)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai(): def test_completion_openai():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
assert response_str == response_str_2 assert response_str == response_str_2
assert type(response_str) == str assert type(response_str) == str
@ -100,6 +123,7 @@ def test_completion_openai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_text_openai(): def test_completion_text_openai():
try: try:
response = completion(model="text-davinci-003", messages=messages) response = completion(model="text-davinci-003", messages=messages)
@ -108,17 +132,31 @@ def test_completion_text_openai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_optional_params(): def test_completion_openai_with_optional_params():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openrouter(): def test_completion_openrouter():
try: try:
response = completion(model="google/palm-2-chat-bison", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai") response = completion(
model="google/palm-2-chat-bison",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
@ -127,12 +165,23 @@ def test_completion_openrouter():
def test_completion_openai_with_more_optional_params(): def test_completion_openai_with_more_optional_params():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
frequency_penalty=-0.5,
logit_bias={123: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
print(response['choices'][0]['message']['content']) print(response["choices"][0]["message"]["content"])
print(response.choices[0].message.content) print(response.choices[0].message.content)
if type(response_str) != str: if type(response_str) != str:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -141,14 +190,28 @@ def test_completion_openai_with_more_optional_params():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream(): def test_completion_openai_with_stream():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
stream=True,
frequency_penalty=-0.5,
logit_bias={27000: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_functions(): def test_completion_openai_with_functions():
function1 = [ function1 = [
{ {
@ -159,33 +222,39 @@ def test_completion_openai_with_functions():
"properties": { "properties": {
"location": { "location": {
"type": "string", "type": "string",
"description": "The city and state, e.g. San Francisco, CA" "description": "The city and state, e.g. San Francisco, CA",
}, },
"unit": { "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
}, },
"required": ["location"] "required": ["location"],
} },
} }
] ]
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, functions=function1) response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_azure(): def test_completion_azure():
try: try:
response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure") response = completion(
model="gpt-3.5-turbo",
deployment_id="chatgpt-test",
messages=messages,
custom_llm_provider="azure",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
def test_completion_replicate_llama_stream(): def test_completion_replicate_llama_stream():
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
try: try:
@ -197,23 +266,32 @@ def test_completion_replicate_llama_stream():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability_stream(): def test_completion_replicate_stability_stream():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try: try:
response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate") response = completion(
model=model_name,
messages=messages,
stream=True,
custom_llm_provider="replicate",
)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability(): def test_completion_replicate_stability():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try: try:
response = completion(model=model_name, messages=messages, custom_llm_provider="replicate") response = completion(
model=model_name, messages=messages, custom_llm_provider="replicate"
)
# Add any assertions here to check the response # Add any assertions here to check the response
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
print(response_str) print(response_str)
print(response_str_2) print(response_str_2)
@ -224,6 +302,7 @@ def test_completion_replicate_stability():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
######## Test TogetherAI ######## ######## Test TogetherAI ########
def test_completion_together_ai(): def test_completion_together_ai():
model_name = "togethercomputer/llama-2-70b-chat" model_name = "togethercomputer/llama-2-70b-chat"
@ -234,15 +313,22 @@ def test_completion_together_ai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_petals(): def test_petals():
model_name = "stabilityai/StableBeluga2" model_name = "stabilityai/StableBeluga2"
try: try:
response = completion(model=model_name, messages=messages, custom_llm_provider="petals", force_timeout=120) response = completion(
model=model_name,
messages=messages,
custom_llm_provider="petals",
force_timeout=120,
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# def test_baseten_falcon_7bcompletion(): # def test_baseten_falcon_7bcompletion():
# model_name = "qvv0xeq" # model_name = "qvv0xeq"
# try: # try:
@ -290,7 +376,6 @@ def test_petals():
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
#### Test A121 ################### #### Test A121 ###################
# def test_completion_ai21(): # def test_completion_ai21():
# model_name = "j2-light" # model_name = "j2-light"
@ -301,7 +386,7 @@ def test_petals():
# except Exception as e: # except Exception as e:
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test config file with completion # # test config file with completion #
# def test_completion_openai_config(): # def test_completion_openai_config():
# try: # try:
# litellm.config_path = "../config.json" # litellm.config_path = "../config.json"
@ -333,4 +418,3 @@ def test_petals():
# return # return
# test_completion_together_ai_stream() # test_completion_together_ai_stream()

View file

@ -1,20 +1,33 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm sys.path.insert(
from litellm import completion 0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import completion
def logging_fn(model_call_dict): def logging_fn(model_call_dict):
print(f"model call details: {model_call_dict}") print(f"model call details: {model_call_dict}")
models = ["gorilla-7b-hf-v1", "gpt-4"] models = ["gorilla-7b-hf-v1", "gpt-4"]
custom_llm_provider = None custom_llm_provider = None
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
for model in models: # iterate through list for model in models: # iterate through list
custom_api_base = None custom_api_base = None
if model == "gorilla-7b-hf-v1": if model == "gorilla-7b-hf-v1":
custom_llm_provider = "custom_openai" custom_llm_provider = "custom_openai"
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1" custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, logger_fn=logging_fn) completion(
model=model,
messages=messages,
custom_llm_provider=custom_llm_provider,
custom_api_base=custom_api_base,
logger_fn=logging_fn,
)

View file

@ -1,9 +1,10 @@
import sys, os import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
from infisical import InfisicalClient from infisical import InfisicalClient
@ -11,10 +12,13 @@ from infisical import InfisicalClient
# # litellm.set_verbose = True # # litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
def test_openai_embedding(): def test_openai_embedding():
try: try:
response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"]) response = embedding(
model="text-embedding-ada-002", input=["good morning from litellm"]
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)}") print(f"response: {str(response)}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -1,10 +1,21 @@
# from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError # from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
import os import os
import sys import sys
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion, AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from litellm import (
embedding,
completion,
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import pytest import pytest
@ -23,8 +34,10 @@ litellm.failure_callback = ["sentry"]
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"] # models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
test_model = "claude-instant-1" test_model = "claude-instant-1"
models = ["claude-instant-1"] models = ["claude-instant-1"]
def logging_fn(model_call_dict): def logging_fn(model_call_dict):
if "model" in model_call_dict: if "model" in model_call_dict:
print(f"model_call_dict: {model_call_dict['model']}") print(f"model_call_dict: {model_call_dict['model']}")
else: else:
print(f"model_call_dict: {model_call_dict}") print(f"model_call_dict: {model_call_dict}")
@ -38,7 +51,12 @@ def test_context_window(model):
try: try:
model = "chatgpt-test" model = "chatgpt-test"
print(f"model: {model}") print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn) response = completion(
model=model,
messages=messages,
custom_llm_provider="azure",
logger_fn=logging_fn,
)
print(f"response: {response}") print(f"response: {response}")
except InvalidRequestError as e: except InvalidRequestError as e:
print(f"InvalidRequestError: {e.llm_provider}") print(f"InvalidRequestError: {e.llm_provider}")
@ -52,14 +70,17 @@ def test_context_window(model):
print(f"Uncaught Exception - {e}") print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
return return
test_context_window(test_model) test_context_window(test_model)
# Test 2: InvalidAuth Errors # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models) @pytest.mark.parametrize("model", models)
def invalid_auth(model): # set the model key to an invalid key, depending on the model def invalid_auth(model): # set the model key to an invalid key, depending on the model
messages = [{ "content": "Hello, how are you?","role": "user"}] messages = [{"content": "Hello, how are you?", "role": "user"}]
temporary_key = None temporary_key = None
try: try:
custom_llm_provider = None custom_llm_provider = None
if model == "gpt-3.5-turbo": if model == "gpt-3.5-turbo":
temporary_key = os.environ["OPENAI_API_KEY"] temporary_key = os.environ["OPENAI_API_KEY"]
@ -74,22 +95,29 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
elif model == "command-nightly": elif model == "command-nightly":
temporary_key = os.environ["COHERE_API_KEY"] temporary_key = os.environ["COHERE_API_KEY"]
os.environ["COHERE_API_KEY"] = "bad-key" os.environ["COHERE_API_KEY"] = "bad-key"
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": elif (
temporary_key = os.environ["REPLICATE_API_KEY"] model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
temporary_key = os.environ["REPLICATE_API_KEY"]
os.environ["REPLICATE_API_KEY"] = "bad-key" os.environ["REPLICATE_API_KEY"] = "bad-key"
print(f"model: {model}") print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider) response = completion(
model=model, messages=messages, custom_llm_provider=custom_llm_provider
)
print(f"response: {response}") print(f"response: {response}")
except AuthenticationError as e: except AuthenticationError as e:
print(f"AuthenticationError Caught Exception - {e.llm_provider}") print(f"AuthenticationError Caught Exception - {e.llm_provider}")
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server except (
OpenAIError
): # is at least an openai error -> in case of random model errors - e.g. overloaded server
print(f"OpenAIError Caught Exception - {e}") print(f"OpenAIError Caught Exception - {e}")
except Exception as e: except Exception as e:
print(type(e)) print(type(e))
print(e.__class__.__name__) print(e.__class__.__name__)
print(f"Uncaught Exception - {e}") print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
if temporary_key != None: # reset the key if temporary_key != None: # reset the key
if model == "gpt-3.5-turbo": if model == "gpt-3.5-turbo":
os.environ["OPENAI_API_KEY"] = temporary_key os.environ["OPENAI_API_KEY"] = temporary_key
elif model == "chatgpt-test": elif model == "chatgpt-test":
@ -99,13 +127,18 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
os.environ["ANTHROPIC_API_KEY"] = temporary_key os.environ["ANTHROPIC_API_KEY"] = temporary_key
elif model == "command-nightly": elif model == "command-nightly":
os.environ["COHERE_API_KEY"] = temporary_key os.environ["COHERE_API_KEY"] = temporary_key
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": elif (
model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
os.environ["REPLICATE_API_KEY"] = temporary_key os.environ["REPLICATE_API_KEY"] = temporary_key
return return
invalid_auth(test_model) invalid_auth(test_model)
# # Test 3: Rate Limit Errors # # Test 3: Rate Limit Errors
# def test_model(model): # def test_model(model):
# try: # try:
# sample_text = "how does a court case get to the Supreme Court?" * 50000 # sample_text = "how does a court case get to the Supreme Court?" * 50000
# messages = [{ "content": sample_text,"role": "user"}] # messages = [{ "content": sample_text,"role": "user"}]
# custom_llm_provider = None # custom_llm_provider = None
@ -142,5 +175,3 @@ invalid_auth(test_model)
# accuracy_score = counts[True]/(counts[True] + counts[False]) # accuracy_score = counts[True]/(counts[True] + counts[False])
# print(f"accuracy_score: {accuracy_score}") # print(f"accuracy_score: {accuracy_score}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -14,11 +16,15 @@ litellm.success_callback = ["helicone"]
litellm.set_verbose = True litellm.set_verbose = True
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
#openai call # openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(
model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
)
#cohere call # cohere call
response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) response = completion(
model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]
)

View file

@ -1,22 +1,37 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import load_test_model, testing_batch_completion from litellm import load_test_model, testing_batch_completion
# ## Load Test Model # ## Load Test Model
# model="gpt-3.5-turbo" # model="gpt-3.5-turbo"
# result = load_test_model(model=model, num_calls=5) # result = load_test_model(model=model, num_calls=5)
# print(result) # print(result)
# print(len(result["results"])) # print(len(result["results"]))
# ## Duration Test Model # ## Duration Test Model
# model="gpt-3.5-turbo" # model="gpt-3.5-turbo"
# result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s # result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
# print(result) # print(result)
## Quality Test across Model ## Quality Test across Model
models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}] models = [
messages = [[{"role": "user", "content": "What is your name?"}], [{"role": "user", "content": "Hey, how's it going?"}]] "gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"claude-instant-1",
{
"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
"custom_llm_provider": "replicate",
},
]
messages = [
[{"role": "user", "content": "What is your name?"}],
[{"role": "user", "content": "Hey, how's it going?"}],
]
result = testing_batch_completion(models=models, messages=messages) result = testing_batch_completion(models=models, messages=messages)
print(result) print(result)

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -11,49 +14,53 @@ litellm.set_verbose = False
score = 0 score = 0
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
# test on openai completion call user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
# test on openai completion call
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# test on non-openai completion call # test on non-openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"claude response: {response}") print(f"claude response: {response}")
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# # test on openai embedding call # # test on openai embedding call
# try: # try:
# response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) # response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
# score +=1 # score +=1
# except: # except:
# traceback.print_exc() # traceback.print_exc()
# # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model # # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
# try: # try:
# response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) # response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
# except: # except:
# score +=1 # expect this to fail # score +=1 # expect this to fail
# traceback.print_exc() # traceback.print_exc()
# # test on good azure openai embedding call # # test on good azure openai embedding call
# try: # try:
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) # response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
# score +=1 # score +=1
# except: # except:
# traceback.print_exc() # traceback.print_exc()
# print(f"Score: {score}, Overall score: {score/5}") # print(f"Score: {score}, Overall score: {score/5}")

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -15,11 +18,11 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list: for model in model_fallback_list:
try: try:
response = embedding(model="text-embedding-ada-002", input=[user_message]) response = embedding(model="text-embedding-ada-002", input=[user_message])
response = completion(model=model, messages=messages) response = completion(model=model, messages=messages)
except Exception as e: except Exception as e:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")

View file

@ -20,4 +20,4 @@
# if __name__ == '__main__': # if __name__ == '__main__':
# from waitress import serve # from waitress import serve
# serve(app, host='localhost', port=8080, threads=10) # serve(app, host='localhost', port=8080, threads=10)

View file

@ -1,4 +1,4 @@
# import requests, json # import requests, json
# BASE_URL = 'http://localhost:8080' # BASE_URL = 'http://localhost:8080'
@ -11,4 +11,4 @@
# print("Hello route test passed!") # print("Hello route test passed!")
# if __name__ == '__main__': # if __name__ == '__main__':
# test_hello_route() # test_hello_route()

View file

@ -4,7 +4,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -13,11 +16,11 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list: for model in model_fallback_list:
try: try:
response = embedding(model="text-embedding-ada-002", input=[user_message]) response = embedding(model="text-embedding-ada-002", input=[user_message])
response = completion(model=model, messages=messages) response = completion(model=model, messages=messages)
except Exception as e: except Exception as e:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")

View file

@ -53,7 +53,6 @@
# # # return this generator to the client for streaming requests # # # return this generator to the client for streaming requests
# # async def get_response(): # # async def get_response():
# # global generator # # global generator
# # async for elem in generator: # # async for elem in generator:

View file

@ -12,7 +12,6 @@
# import asyncio # import asyncio
# user_message = "respond in 20 words. who are you?" # user_message = "respond in 20 words. who are you?"
# messages = [{ "content": user_message,"role": "user"}] # messages = [{ "content": user_message,"role": "user"}]
@ -45,8 +44,3 @@
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_ollama_stream() # test_completion_ollama_stream()

View file

@ -4,7 +4,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
from infisical import InfisicalClient from infisical import InfisicalClient
@ -15,7 +18,7 @@ infisical_token = os.environ["INFISICAL_TOKEN"]
litellm.secret_manager_client = InfisicalClient(token=infisical_token) litellm.secret_manager_client = InfisicalClient(token=infisical_token)
user_message = "Hello, whats the weather in San Francisco??" user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def test_completion_openai(): def test_completion_openai():
@ -28,5 +31,5 @@ def test_completion_openai():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
litellm.secret_manager_client = None litellm.secret_manager_client = None
test_completion_openai()
test_completion_openai()

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import completion from litellm import completion
@ -11,29 +14,40 @@ litellm.set_verbose = False
score = 0 score = 0
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
# test on anthropic completion call user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
# test on anthropic completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
)
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# test on anthropic completion call # test on anthropic completion call
try: try:
response = completion(model="meta-llama/Llama-2-7b-chat-hf", messages=messages, custom_llm_provider="huggingface", custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud", stream=True, logger_fn=logger_fn) response = completion(
model="meta-llama/Llama-2-7b-chat-hf",
messages=messages,
custom_llm_provider="huggingface",
custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud",
stream=True,
logger_fn=logger_fn,
)
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass

View file

@ -21,7 +21,7 @@
# #openai call # #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) # response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call # #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) # response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -3,10 +3,14 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import time import time
from litellm import timeout from litellm import timeout
@timeout(10) @timeout(10)
def stop_after_10_s(force_timeout=60): def stop_after_10_s(force_timeout=60):
print("Stopping after 10 seconds") print("Stopping after 10 seconds")
@ -14,14 +18,14 @@ def stop_after_10_s(force_timeout=60):
return return
start_time = time.time() start_time = time.time()
try: try:
stop_after_10_s(force_timeout=1) stop_after_10_s(force_timeout=1)
except Exception as e: except Exception as e:
print(e) print(e)
pass pass
end_time = time.time() end_time = time.time()
print(f"total time: {end_time-start_time}") print(f"total time: {end_time-start_time}")

View file

@ -49,4 +49,4 @@
# # chat = chat_model.start_chat() # # chat = chat_model.start_chat()
# # response = chat.send_message("who are u? write a sentence", **parameters) # # response = chat.send_message("who are u? write a sentence", **parameters)
# # print(f"Response from Model: {response.text}") # # print(f"Response from Model: {response.text}")

View file

@ -11,9 +11,7 @@ from threading import Thread
from openai.error import Timeout from openai.error import Timeout
def timeout( def timeout(timeout_duration: float = None, exception_to_raise=Timeout):
timeout_duration: float = None, exception_to_raise = Timeout
):
""" """
Wraps a function to raise the specified exception if execution time Wraps a function to raise the specified exception if execution time
is greater than the specified timeout. is greater than the specified timeout.
@ -44,7 +42,9 @@ def timeout(
result = future.result(timeout=local_timeout_duration) result = future.result(timeout=local_timeout_duration)
except futures.TimeoutError: except futures.TimeoutError:
thread.stop_loop() thread.stop_loop()
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
thread.stop_loop() thread.stop_loop()
return result return result
@ -59,7 +59,9 @@ def timeout(
) )
return value return value
except asyncio.TimeoutError: except asyncio.TimeoutError:
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
if iscoroutinefunction(func): if iscoroutinefunction(func):
return async_wrapper return async_wrapper
@ -80,4 +82,4 @@ class _LoopWrapper(Thread):
def stop_loop(self): def stop_loop(self):
for task in asyncio.all_tasks(self.loop): for task in asyncio.all_tasks(self.loop):
task.cancel() task.cancel()
self.loop.call_soon_threadsafe(self.loop.stop) self.loop.call_soon_threadsafe(self.loop.stop)

File diff suppressed because it is too large Load diff