add linting

This commit is contained in:
ishaan-jaff 2023-08-18 11:05:05 -07:00
parent 8ef47524bf
commit 15b1da9dc8
40 changed files with 3110 additions and 1709 deletions

View file

@ -1,9 +1,10 @@
import threading import threading
success_callback = [] success_callback = []
failure_callback = [] failure_callback = []
set_verbose=False set_verbose = False
telemetry=True telemetry = True
max_tokens = 256 # OpenAI Defaults max_tokens = 256 # OpenAI Defaults
retry = True retry = True
api_key = None api_key = None
openai_key = None openai_key = None
@ -19,33 +20,99 @@ caching = False
hugging_api_token = None hugging_api_token = None
togetherai_api_key = None togetherai_api_key = None
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
####### THREAD-SPECIFIC DATA ################### ####### THREAD-SPECIFIC DATA ###################
class MyLocal(threading.local): class MyLocal(threading.local):
def __init__(self): def __init__(self):
self.user = "Hello World" self.user = "Hello World"
_thread_context = MyLocal() _thread_context = MyLocal()
def identify(event_details): def identify(event_details):
# Store user in thread local data # Store user in thread local data
if "user" in event_details: if "user" in event_details:
_thread_context.user = event_details["user"] _thread_context.user = event_details["user"]
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc. ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
api_base = None api_base = None
headers = None headers = None
@ -56,60 +123,48 @@ config_path = None
secret_manager_client = None secret_manager_client = None
####### COMPLETION MODELS ################### ####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [ open_ai_chat_completion_models = [
"gpt-4", "gpt-4",
"gpt-4-0613", "gpt-4-0613",
"gpt-4-32k", "gpt-4-32k",
"gpt-4-32k-0613", "gpt-4-32k-0613",
################# #################
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k-0613",
]
open_ai_text_completion_models = [
'text-davinci-003'
] ]
open_ai_text_completion_models = ["text-davinci-003"]
cohere_models = [ cohere_models = [
'command-nightly', "command-nightly",
"command", "command",
"command-light", "command-light",
"command-medium-beta", "command-medium-beta",
"command-xlarge-beta" "command-xlarge-beta",
] ]
anthropic_models = [ anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
"claude-2",
"claude-instant-1",
"claude-instant-1.2"
]
replicate_models = [ replicate_models = [
"replicate/" "replicate/"
] # placeholder, to make sure we accept any replicate model in our model_list ] # placeholder, to make sure we accept any replicate model in our model_list
openrouter_models = [ openrouter_models = [
'google/palm-2-codechat-bison', "google/palm-2-codechat-bison",
'google/palm-2-chat-bison', "google/palm-2-chat-bison",
'openai/gpt-3.5-turbo', "openai/gpt-3.5-turbo",
'openai/gpt-3.5-turbo-16k', "openai/gpt-3.5-turbo-16k",
'openai/gpt-4-32k', "openai/gpt-4-32k",
'anthropic/claude-2', "anthropic/claude-2",
'anthropic/claude-instant-v1', "anthropic/claude-instant-v1",
'meta-llama/llama-2-13b-chat', "meta-llama/llama-2-13b-chat",
'meta-llama/llama-2-70b-chat' "meta-llama/llama-2-70b-chat",
] ]
vertex_chat_models = [ vertex_chat_models = ["chat-bison", "chat-bison@001"]
"chat-bison",
"chat-bison@001"
]
vertex_text_models = [ vertex_text_models = ["text-bison", "text-bison@001"]
"text-bison",
"text-bison@001"
]
huggingface_models = [ huggingface_models = [
"meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-hf",
@ -124,25 +179,56 @@ huggingface_models = [
"meta-llama/Llama-2-13b-chat", "meta-llama/Llama-2-13b-chat",
"meta-llama/Llama-2-70b", "meta-llama/Llama-2-70b",
"meta-llama/Llama-2-70b-chat", "meta-llama/Llama-2-70b-chat",
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
ai21_models = [ ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
"j2-ultra",
"j2-mid", model_list = (
"j2-light" open_ai_chat_completion_models
+ open_ai_text_completion_models
+ cohere_models
+ anthropic_models
+ replicate_models
+ openrouter_models
+ huggingface_models
+ vertex_chat_models
+ vertex_text_models
+ ai21_models
)
provider_list = [
"openai",
"cohere",
"anthropic",
"replicate",
"huggingface",
"together_ai",
"openrouter",
"vertex_ai",
"ai21",
] ]
model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models
provider_list = ["openai", "cohere", "anthropic", "replicate", "huggingface", "together_ai", "openrouter", "vertex_ai", "ai21"]
####### EMBEDDING MODELS ################### ####### EMBEDDING MODELS ###################
open_ai_embedding_models = [ open_ai_embedding_models = ["text-embedding-ada-002"]
'text-embedding-ada-002'
]
from .timeout import timeout from .timeout import timeout
from .testing import * from .testing import *
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, get_litellm_params from .utils import (
client,
logging,
exception_type,
get_optional_params,
modify_integration,
token_counter,
cost_per_token,
completion_cost,
get_litellm_params,
)
from .main import * # Import all the symbols from main.py from .main import * # Import all the symbols from main.py
from .integrations import * from .integrations import *
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)

View file

@ -1,12 +1,21 @@
## LiteLLM versions of the OpenAI Exception Types ## LiteLLM versions of the OpenAI Exception Types
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
class AuthenticationError(AuthenticationError): class AuthenticationError(AuthenticationError):
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider):
self.status_code = 401 self.status_code = 401
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class InvalidRequestError(InvalidRequestError): class InvalidRequestError(InvalidRequestError):
@ -15,7 +24,9 @@ class InvalidRequestError(InvalidRequestError):
self.message = message self.message = message
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message, f"{self.model}") # Call the base class constructor with the parameters it needs super().__init__(
self.message, f"{self.model}"
) # Call the base class constructor with the parameters it needs
class RateLimitError(RateLimitError): class RateLimitError(RateLimitError):
@ -23,21 +34,29 @@ class RateLimitError(RateLimitError):
self.status_code = 429 self.status_code = 429
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class ServiceUnavailableError(ServiceUnavailableError): class ServiceUnavailableError(ServiceUnavailableError):
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider):
self.status_code = 500 self.status_code = 500
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class OpenAIError(OpenAIError): class OpenAIError(OpenAIError):
def __init__(self, original_exception): def __init__(self, original_exception):
self.status_code = original_exception.http_status self.status_code = original_exception.http_status
super().__init__(http_body=original_exception.http_body, super().__init__(
http_status=original_exception.http_status, http_body=original_exception.http_body,
json_body=original_exception.json_body, http_status=original_exception.http_status,
headers=original_exception.headers, json_body=original_exception.json_body,
code=original_exception.code) headers=original_exception.headers,
code=original_exception.code,
)
self.llm_provider = "openai" self.llm_provider = "openai"

View file

@ -2,28 +2,90 @@
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime import datetime
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class AISpendLogger: class AISpendLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
@ -37,12 +99,18 @@ class AISpendLogger:
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model: elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
@ -55,37 +123,52 @@ class AISpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, response_obj, start_time, end_time, print_verbose): def log_event(self, model, response_obj, start_time, end_time, print_verbose):
# Method definition # Method definition
try: try:
print_verbose(f"AISpend Logging - Enters logging function for model {model}") print_verbose(
f"AISpend Logging - Enters logging function for model {model}"
)
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data" url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
headers = { headers = {
'Authorization': f'Bearer {self.api_key}', "Authorization": f"Bearer {self.api_key}",
'Content-Type': 'application/json' "Content-Type": "application/json",
} }
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d') response_timestamp = datetime.datetime.fromtimestamp(
int(response_obj["created"])
).strftime("%Y-%m-%d")
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100 prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100 completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
data = [{ data = [
"requests": 1, {
"requests_context": 1, "requests": 1,
"context_tokens": response_obj["usage"]["prompt_tokens"], "requests_context": 1,
"requests_generated": 1, "context_tokens": response_obj["usage"]["prompt_tokens"],
"generated_tokens": response_obj["usage"]["completion_tokens"], "requests_generated": 1,
"recorded_date": response_timestamp, "generated_tokens": response_obj["usage"]["completion_tokens"],
"model_id": response_obj["model"], "recorded_date": response_timestamp,
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent, "model_id": response_obj["model"],
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
}] "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
}
]
print_verbose(f"AISpend Logging - final data object: {data}") print_verbose(f"AISpend Logging - final data object: {data}")
except: except:

View file

@ -2,28 +2,90 @@
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime import datetime
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class BerriSpendLogger: class BerriSpendLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
@ -36,12 +98,18 @@ class BerriSpendLogger:
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model: elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
@ -54,42 +122,59 @@ class BerriSpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose): def log_event(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition # Method definition
try: try:
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}") print_verbose(
f"BerriSpend Logging - Enters logging function for model {model}"
)
url = f"https://berrispend.berri.ai/spend" url = f"https://berrispend.berri.ai/spend"
headers = { headers = {"Content-Type": "application/json"}
'Content-Type': 'application/json'
}
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time-start_time).total_seconds() response_time = (end_time - start_time).total_seconds()
if "response" in response_obj: if "response" in response_obj:
data = [{ data = [
"response_time": response_time, {
"model_id": response_obj["model"], "response_time": response_time,
"total_cost": total_cost, "model_id": response_obj["model"],
"messages": messages, "total_cost": total_cost,
"response": response_obj['choices'][0]['message']['content'], "messages": messages,
"account_id": self.account_id "response": response_obj["choices"][0]["message"]["content"],
}] "account_id": self.account_id,
}
]
elif "error" in response_obj: elif "error" in response_obj:
data = [{ data = [
"response_time": response_time, {
"model_id": response_obj["model"], "response_time": response_time,
"total_cost": total_cost, "model_id": response_obj["model"],
"messages": messages, "total_cost": total_cost,
"error": response_obj['error'], "messages": messages,
"account_id": self.account_id "error": response_obj["error"],
}] "account_id": self.account_id,
}
]
print_verbose(f"BerriSpend Logging - final data object: {data}") print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)

View file

@ -2,18 +2,23 @@
# On success, logs events to Helicone # On success, logs events to Helicone
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
class HeliconeLogger: class HeliconeLogger:
# Class variables or attributes # Class variables or attributes
helicone_model_list = ["gpt", "claude"] helicone_model_list = ["gpt", "claude"]
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.provider_url = "https://api.openai.com/v1" self.provider_url = "https://api.openai.com/v1"
self.key = os.getenv('HELICONE_API_KEY') self.key = os.getenv("HELICONE_API_KEY")
def claude_mapping(self, model, messages, response_obj): def claude_mapping(self, model, messages, response_obj):
from anthropic import HUMAN_PROMPT, AI_PROMPT from anthropic import HUMAN_PROMPT, AI_PROMPT
prompt = f"{HUMAN_PROMPT}" prompt = f"{HUMAN_PROMPT}"
for message in messages: for message in messages:
if "role" in message: if "role" in message:
@ -26,46 +31,82 @@ class HeliconeLogger:
prompt += f"{AI_PROMPT}" prompt += f"{AI_PROMPT}"
claude_provider_request = {"model": model, "prompt": prompt} claude_provider_request = {"model": model, "prompt": prompt}
claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"} claude_response_obj = {
"completion": response_obj["choices"][0]["message"]["content"],
"model": model,
"stop_reason": "stop_sequence",
}
return claude_provider_request, claude_response_obj return claude_provider_request, claude_response_obj
def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose): def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition # Method definition
try: try:
print_verbose(f"Helicone Logging - Enters logging function for model {model}") print_verbose(
model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo" f"Helicone Logging - Enters logging function for model {model}"
)
model = (
model
if any(
accepted_model in model
for accepted_model in self.helicone_model_list
)
else "gpt-3.5-turbo"
)
provider_request = {"model": model, "messages": messages} provider_request = {"model": model, "messages": messages}
if "claude" in model: if "claude" in model:
provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj) provider_request, response_obj = self.claude_mapping(
model=model, messages=messages, response_obj=response_obj
)
providerResponse = { providerResponse = {
"json": response_obj, "json": response_obj,
"headers": {"openai-version": "2020-10-01"}, "headers": {"openai-version": "2020-10-01"},
"status": 200 "status": 200,
} }
# Code to be executed # Code to be executed
url = "https://api.hconeai.com/oai/v1/log" url = "https://api.hconeai.com/oai/v1/log"
headers = { headers = {
'Authorization': f'Bearer {self.key}', "Authorization": f"Bearer {self.key}",
'Content-Type': 'application/json' "Content-Type": "application/json",
} }
start_time_seconds = int(start_time.timestamp()) start_time_seconds = int(start_time.timestamp())
start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000) start_time_milliseconds = int(
(start_time.timestamp() - start_time_seconds) * 1000
)
end_time_seconds = int(end_time.timestamp()) end_time_seconds = int(end_time.timestamp())
end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000) end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000
)
data = { data = {
"providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}}, "providerRequest": {
"url": self.provider_url,
"json": provider_request,
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
},
"providerResponse": providerResponse, "providerResponse": providerResponse,
"timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..} "timing": {
"startTime": {
"seconds": start_time_seconds,
"milliseconds": start_time_milliseconds,
},
"endTime": {
"seconds": end_time_seconds,
"milliseconds": end_time_milliseconds,
},
}, # {"seconds": .., "milliseconds": ..}
} }
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)
if response.status_code == 200: if response.status_code == 200:
print_verbose("Helicone Logging - Success!") print_verbose("Helicone Logging - Success!")
else: else:
print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}") print_verbose(
f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
)
print_verbose(f"Helicone Logging - Error {response.text}") print_verbose(f"Helicone Logging - Error {response.text}")
except: except:
# traceback.print_exc() # traceback.print_exc()

View file

@ -3,31 +3,94 @@
import dotenv, os import dotenv, os
import requests import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import datetime, subprocess, sys import datetime, subprocess, sys
model_cost = { model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-3.5-turbo": {
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name "max_tokens": 4000,
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "input_cost_per_token": 0.0000015,
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, },
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name "gpt-35-turbo": {
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, "max_tokens": 4000,
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "input_cost_per_token": 0.0000015,
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, "output_cost_per_token": 0.000002,
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, }, # azure model name
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, "gpt-3.5-turbo-0613": {
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, "max_tokens": 4000,
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, "input_cost_per_token": 0.0000015,
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, "output_cost_per_token": 0.000002,
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, },
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
} }
class Supabase: class Supabase:
# Class variables or attributes # Class variables or attributes
supabase_table_name = "request_logs" supabase_table_name = "request_logs"
def __init__(self): def __init__(self):
# Instance variables # Instance variables
self.supabase_url = os.getenv("SUPABASE_URL") self.supabase_url = os.getenv("SUPABASE_URL")
@ -35,9 +98,11 @@ class Supabase:
try: try:
import supabase import supabase
except ImportError: except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase']) subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
import supabase import supabase
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key) self.supabase_client = supabase.create_client(
self.supabase_url, self.supabase_key
)
def price_calculator(self, model, response_obj, start_time, end_time): def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map # try and find if the model is in the model_cost map
@ -45,12 +110,18 @@ class Supabase:
prompt_tokens_cost_usd_dollar = 0 prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0
if model in model_cost: if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model: elif "replicate" in model:
# replicate models are charged based on time # replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032 cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
@ -63,40 +134,74 @@ class Supabase:
output_cost_sum += model_cost[model]["output_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] prompt_tokens_cost_usd_dollar = (
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose): def log_event(
self,
model,
messages,
end_user,
response_obj,
start_time,
end_time,
print_verbose,
):
try: try:
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}") print_verbose(
f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
)
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) (
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time-start_time).total_seconds() response_time = (end_time - start_time).total_seconds()
if "choices" in response_obj: if "choices" in response_obj:
supabase_data_obj = { supabase_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "messages": messages,
"response": response_obj['choices'][0]['message']['content'], "response": response_obj["choices"][0]["message"]["content"],
"end_user": end_user "end_user": end_user,
} }
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") print_verbose(
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
elif "error" in response_obj: elif "error" in response_obj:
supabase_data_obj = { supabase_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "messages": messages,
"error": response_obj['error'], "error": response_obj["error"],
"end_user": end_user "end_user": end_user,
} }
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") print_verbose(
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
except: except:
# traceback.print_exc() # traceback.print_exc()

View file

@ -6,18 +6,22 @@ import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
class AnthropicConstants(Enum): class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman:" HUMAN_PROMPT = "\n\nHuman:"
AI_PROMPT = "\n\nAssistant:" AI_PROMPT = "\n\nAssistant:"
class AnthropicError(Exception): class AnthropicError(Exception):
def __init__(self, status_code, message): def __init__(self, status_code, message):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class AnthropicLLM: class AnthropicLLM:
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
self.encoding = encoding self.encoding = encoding
self.default_max_tokens_to_sample = default_max_tokens_to_sample self.default_max_tokens_to_sample = default_max_tokens_to_sample
@ -25,31 +29,50 @@ class AnthropicLLM:
self.api_key = api_key self.api_key = api_key
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model def validate_environment(
self, api_key
): # set up the environment required to run the model
# set the api key # set the api key
if self.api_key == None: if self.api_key == None:
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params") raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
self.api_key = api_key self.api_key = api_key
self.headers = { self.headers = {
"accept": "application/json", "accept": "application/json",
"anthropic-version": "2023-06-01", "anthropic-version": "2023-06-01",
"content-type": "application/json", "content-type": "application/json",
"x-api-key": self.api_key "x-api-key": self.api_key,
} }
def completion(self, model: str, messages: list, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls def completion(
self,
model: str,
messages: list,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
model = model model = model
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
for message in messages: for message in messages:
if "role" in message: if "role" in message:
if message["role"] == "user": if message["role"] == "user":
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}" prompt += (
f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
)
else: else:
prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}" prompt += (
f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
)
else: else:
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}" prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
prompt += f"{AnthropicConstants.AI_PROMPT.value}" prompt += f"{AnthropicConstants.AI_PROMPT.value}"
if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'): if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
"inf"
):
max_tokens = optional_params["max_tokens"] max_tokens = optional_params["max_tokens"]
else: else:
max_tokens = self.default_max_tokens_to_sample max_tokens = self.default_max_tokens_to_sample
@ -57,39 +80,66 @@ class AnthropicLLM:
"model": model, "model": model,
"prompt": prompt, "prompt": prompt,
"max_tokens_to_sample": max_tokens, "max_tokens_to_sample": max_tokens,
**optional_params **optional_params,
} }
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data)) response = requests.post(
self.completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}") print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()
if "error" in completion_response: if "error" in completion_response:
raise AnthropicError(message=completion_response["error"], status_code=response.status_code) raise AnthropicError(
message=completion_response["error"],
status_code=response.status_code,
)
else: else:
model_response["choices"][0]["message"]["content"] = completion_response["completion"] model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
## CALCULATING USAGE ## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here prompt_tokens = len(
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here self.encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the anthropic tokenizer here
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens "total_tokens": prompt_tokens + completion_tokens,
} }
return model_response return model_response
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass pass

View file

@ -1,11 +1,12 @@
## This is a template base class to be used for adding new LLM providers via API calls ## This is a template base class to be used for adding new LLM providers via API calls
class BaseLLM():
def validate_environment(): # set up the environment required to run the model class BaseLLM:
def validate_environment(): # set up the environment required to run the model
pass pass
def completion(): # logic for parsing in - calling - parsing out model completion calls def completion(): # logic for parsing in - calling - parsing out model completion calls
pass pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass pass

View file

@ -7,18 +7,24 @@ import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
class HuggingfaceError(Exception): class HuggingfaceError(Exception):
def __init__(self, status_code, message): def __init__(self, status_code, message):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class HuggingfaceRestAPILLM():
class HuggingfaceRestAPILLM:
def __init__(self, encoding, api_key=None) -> None: def __init__(self, encoding, api_key=None) -> None:
self.encoding = encoding self.encoding = encoding
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model def validate_environment(
self, api_key
): # set up the environment required to run the model
self.headers = { self.headers = {
"content-type": "application/json", "content-type": "application/json",
} }
@ -27,7 +33,17 @@ class HuggingfaceRestAPILLM():
if self.api_key != None: if self.api_key != None:
self.headers["Authorization"] = f"Bearer {self.api_key}" self.headers["Authorization"] = f"Bearer {self.api_key}"
def completion(self, model: str, messages: list, custom_api_base: str, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls def completion(
self,
model: str,
messages: list,
custom_api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
if custom_api_base: if custom_api_base:
completion_url = custom_api_base completion_url = custom_api_base
elif "HF_API_BASE" in os.environ: elif "HF_API_BASE" in os.environ:
@ -35,7 +51,9 @@ class HuggingfaceRestAPILLM():
else: else:
completion_url = f"https://api-inference.huggingface.co/models/{model}" completion_url = f"https://api-inference.huggingface.co/models/{model}"
prompt = "" prompt = ""
if "meta-llama" in model and "chat" in model: # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2 if (
"meta-llama" in model and "chat" in model
): # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
prompt = "<s>" prompt = "<s>"
for message in messages: for message in messages:
if message["role"] == "system": if message["role"] == "system":
@ -57,14 +75,33 @@ class HuggingfaceRestAPILLM():
# "parameters": optional_params # "parameters": optional_params
} }
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = requests.post(completion_url, headers=self.headers, data=json.dumps(data)) response = requests.post(
completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn) logging(
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}") print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()
@ -72,24 +109,32 @@ class HuggingfaceRestAPILLM():
if isinstance(completion_response, dict) and "error" in completion_response: if isinstance(completion_response, dict) and "error" in completion_response:
print_verbose(f"completion error: {completion_response['error']}") print_verbose(f"completion error: {completion_response['error']}")
print_verbose(f"response.status_code: {response.status_code}") print_verbose(f"response.status_code: {response.status_code}")
raise HuggingfaceError(message=completion_response["error"], status_code=response.status_code) raise HuggingfaceError(
message=completion_response["error"],
status_code=response.status_code,
)
else: else:
model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"] model_response["choices"][0]["message"][
"content"
] = completion_response[0]["generated_text"]
## CALCULATING USAGE ## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the llama2 tokenizer here prompt_tokens = len(
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the llama2 tokenizer here self.encoding.encode(prompt)
) ##[TODO] use the llama2 tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the llama2 tokenizer here
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens "total_tokens": prompt_tokens + completion_tokens,
} }
return model_response return model_response
pass pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass pass

File diff suppressed because it is too large Load diff

View file

@ -3,51 +3,80 @@ import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import traceback import traceback
def testing_batch_completion(*args, **kwargs):
try:
batch_models = args[0] if len(args) > 0 else kwargs.pop("models") ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
results = []
completions = []
exceptions = []
times = []
with ThreadPoolExecutor() as executor:
for model in batch_models:
kwargs_modified = dict(kwargs)
args_modified = list(args)
if len(args) > 0:
args_modified[0] = model["model"]
else:
kwargs_modified["model"] = model["model"] if isinstance(model, dict) and "model" in model else model # if model is a dictionary get it's value else assume it's a string
kwargs_modified["custom_llm_provider"] = model["custom_llm_provider"] if isinstance(model, dict) and "custom_llm_provider" in model else None
kwargs_modified["custom_api_base"] = model["custom_api_base"] if isinstance(model, dict) and "custom_api_base" in model else None
for message_list in batch_messages:
if len(args) > 1:
args_modified[1] = message_list
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
else:
kwargs_modified["messages"] = message_list
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
completions.append((future, message_list))
# Retrieve the results and calculate elapsed time for each completion call def testing_batch_completion(*args, **kwargs):
for completion in completions: try:
future, message_list = completion batch_models = (
start_time = time.time() args[0] if len(args) > 0 else kwargs.pop("models")
try: ) ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
result = future.result() batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
end_time = time.time() results = []
elapsed_time = end_time - start_time completions = []
result_dict = {"status": "succeeded", "response": future.result(), "prompt": message_list, "response_time": elapsed_time} exceptions = []
results.append(result_dict) times = []
except Exception as e: with ThreadPoolExecutor() as executor:
end_time = time.time() for model in batch_models:
elapsed_time = end_time - start_time kwargs_modified = dict(kwargs)
result_dict = {"status": "failed", "response": e, "response_time": elapsed_time} args_modified = list(args)
results.append(result_dict) if len(args) > 0:
return results args_modified[0] = model["model"]
except: else:
traceback.print_exc() kwargs_modified["model"] = (
model["model"]
if isinstance(model, dict) and "model" in model
else model
) # if model is a dictionary get it's value else assume it's a string
kwargs_modified["custom_llm_provider"] = (
model["custom_llm_provider"]
if isinstance(model, dict) and "custom_llm_provider" in model
else None
)
kwargs_modified["custom_api_base"] = (
model["custom_api_base"]
if isinstance(model, dict) and "custom_api_base" in model
else None
)
for message_list in batch_messages:
if len(args) > 1:
args_modified[1] = message_list
future = executor.submit(
litellm.completion, *args_modified, **kwargs_modified
)
else:
kwargs_modified["messages"] = message_list
future = executor.submit(
litellm.completion, *args_modified, **kwargs_modified
)
completions.append((future, message_list))
# Retrieve the results and calculate elapsed time for each completion call
for completion in completions:
future, message_list = completion
start_time = time.time()
try:
result = future.result()
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "succeeded",
"response": future.result(),
"prompt": message_list,
"response_time": elapsed_time,
}
results.append(result_dict)
except Exception as e:
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "failed",
"response": e,
"response_time": elapsed_time,
}
results.append(result_dict)
return results
except:
traceback.print_exc()
def duration_test_model(original_function): def duration_test_model(original_function):
def wrapper_function(*args, **kwargs): def wrapper_function(*args, **kwargs):
@ -70,22 +99,39 @@ def duration_test_model(original_function):
# Return the wrapper function # Return the wrapper function
return wrapper_function return wrapper_function
@duration_test_model @duration_test_model
def load_test_model(models: list, prompt: str = None, num_calls: int = None): def load_test_model(models: list, prompt: str = None, num_calls: int = None):
test_calls = 100 test_calls = 100
if num_calls: if num_calls:
test_calls = num_calls test_calls = num_calls
input_prompt = prompt if prompt else "Hey, how's it going?" input_prompt = prompt if prompt else "Hey, how's it going?"
messages = [{"role": "user", "content": prompt}] if prompt else [{"role": "user", "content": input_prompt}] messages = (
full_message_list = [messages for _ in range(test_calls)] # call it as many times as set by user to load test models [{"role": "user", "content": prompt}]
start_time = time.time() if prompt
try: else [{"role": "user", "content": input_prompt}]
results = testing_batch_completion(models=models, messages=full_message_list) )
end_time = time.time() full_message_list = [
response_time = end_time - start_time messages for _ in range(test_calls)
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "results": results} ] # call it as many times as set by user to load test models
except Exception as e: start_time = time.time()
traceback.print_exc() try:
end_time = time.time() results = testing_batch_completion(models=models, messages=full_message_list)
response_time = end_time - start_time end_time = time.time()
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "exception": e} response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"results": results,
}
except Exception as e:
traceback.print_exc()
end_time = time.time()
response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"exception": e,
}

View file

@ -3,24 +3,34 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
litellm.set_verbose = False litellm.set_verbose = False
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
## Test 1: Setting key dynamically ## Test 1: Setting key dynamically
temp_key = os.environ.get("ANTHROPIC_API_KEY") temp_key = os.environ.get("ANTHROPIC_API_KEY")
os.environ["ANTHROPIC_API_KEY"] = "bad-key" os.environ["ANTHROPIC_API_KEY"] = "bad-key"
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn, api_key=temp_key) response = completion(
model="claude-instant-1",
messages=messages,
logger_fn=logger_fn,
api_key=temp_key,
)
print(f"response: {response}") print(f"response: {response}")
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
@ -33,7 +43,9 @@ litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
os.environ.pop("ANTHROPIC_API_KEY") os.environ.pop("ANTHROPIC_API_KEY")
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}") print(f"response: {response}")
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")

View file

@ -5,17 +5,22 @@ import sys, os
import pytest import pytest
import traceback import traceback
import asyncio import asyncio
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from litellm import acompletion from litellm import acompletion
async def test_get_response(): async def test_get_response():
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
try: try:
response = await acompletion(model="gpt-3.5-turbo", messages=messages) response = await acompletion(model="gpt-3.5-turbo", messages=messages)
except Exception as e: except Exception as e:
pytest.fail(f"error occurred: {e}") pytest.fail(f"error occurred: {e}")
return response return response
response = asyncio.run(test_get_response()) response = asyncio.run(test_get_response())
print(response) print(response)

View file

@ -5,12 +5,13 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Get the current directory of the script # Get the current directory of the script
current_dir = os.path.dirname(os.path.abspath(__file__)) current_dir = os.path.dirname(os.path.abspath(__file__))
# Get the parent directory by joining the current directory with '..' # Get the parent directory by joining the current directory with '..'
parent_dir = os.path.join(current_dir, '../..') parent_dir = os.path.join(current_dir, "../..")
# Add the parent directory to the system path # Add the parent directory to the system path
sys.path.append(parent_dir) sys.path.append(parent_dir)
@ -26,7 +27,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
model_val = None model_val = None
@ -39,7 +40,7 @@ def test_completion_with_empty_model():
pass pass
#bad key # bad key
temp_key = os.environ.get("OPENAI_API_KEY") temp_key = os.environ.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = "bad-key" os.environ["OPENAI_API_KEY"] = "bad-key"
# test on openai completion call # test on openai completion call

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import batch_completion from litellm import batch_completion

View file

@ -1,9 +1,13 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest import pytest
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -12,7 +16,6 @@ litellm.caching = True
messages = [{"role": "user", "content": "who is ishaan Github? "}] messages = [{"role": "user", "content": "who is ishaan Github? "}]
# test if response cached # test if response cached
def test_caching(): def test_caching():
try: try:
@ -29,7 +32,3 @@ def test_caching():
litellm.caching = False litellm.caching = False
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -14,17 +16,22 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
litellm.set_verbose = True litellm.set_verbose = True
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
# print(f"model call details: {model_call_object}") # print(f"model call details: {model_call_object}")
pass pass
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def test_completion_openai(): def test_completion_openai():
try: try:
print("running query") print("running query")
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) response = completion(
model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}") print(f"response: {response}")
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
@ -34,33 +41,46 @@ def test_completion_openai():
def test_completion_claude(): def test_completion_claude():
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_non_openai(): def test_completion_non_openai():
try: try:
response = completion(model="command-nightly", messages=messages, logger_fn=logger_fn) response = completion(
model="command-nightly", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_embedding_openai(): def test_embedding_openai():
try: try:
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) response = embedding(
model="text-embedding-ada-002", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)[:50]}") print(f"response: {str(response)[:50]}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_bad_azure_embedding(): def test_bad_azure_embedding():
try: try:
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) response = embedding(
model="chatgpt-test", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)[:50]}") print(f"response: {str(response)[:50]}")
except Exception as e: except Exception as e:
pass pass
# def test_good_azure_embedding(): # def test_good_azure_embedding():
# try: # try:
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) # response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
@ -68,4 +88,3 @@ def test_bad_azure_embedding():
# print(f"response: {str(response)[:50]}") # print(f"response: {str(response)[:50]}")
# except Exception as e: # except Exception as e:
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")

View file

@ -1,44 +1,58 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest import pytest
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
# from infisical import InfisicalClient # from infisical import InfisicalClient
# litellm.set_verbose = True # litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
user_message = "Hello, whats the weather in San Francisco??" user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def logger_fn(user_model_dict): def logger_fn(user_model_dict):
print(f"user_model_dict: {user_model_dict}") print(f"user_model_dict: {user_model_dict}")
def test_completion_claude(): def test_completion_claude():
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_claude_stream(): def test_completion_claude_stream():
try: try:
messages = [ messages = [
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"} {
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
] ]
response = completion(model="claude-2", messages=messages, stream=True) response = completion(model="claude-2", messages=messages, stream=True)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# def test_completion_hf_api(): # def test_completion_hf_api():
# try: # try:
# user_message = "write some code to find the sum of two numbers" # user_message = "write some code to find the sum of two numbers"
@ -62,10 +76,12 @@ def test_completion_claude_stream():
def test_completion_cohere(): def test_completion_cohere():
try: try:
response = completion(model="command-nightly", messages=messages, max_tokens=100) response = completion(
model="command-nightly", messages=messages, max_tokens=100
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
print(f"str response{response_str}") print(f"str response{response_str}")
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
if type(response_str) != str: if type(response_str) != str:
@ -75,24 +91,31 @@ def test_completion_cohere():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_cohere_stream(): def test_completion_cohere_stream():
try: try:
messages = [ messages = [
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"} {
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
] ]
response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50) response = completion(
model="command-nightly", messages=messages, stream=True, max_tokens=50
)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai(): def test_completion_openai():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
assert response_str == response_str_2 assert response_str == response_str_2
assert type(response_str) == str assert type(response_str) == str
@ -100,6 +123,7 @@ def test_completion_openai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_text_openai(): def test_completion_text_openai():
try: try:
response = completion(model="text-davinci-003", messages=messages) response = completion(model="text-davinci-003", messages=messages)
@ -108,17 +132,31 @@ def test_completion_text_openai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_optional_params(): def test_completion_openai_with_optional_params():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openrouter(): def test_completion_openrouter():
try: try:
response = completion(model="google/palm-2-chat-bison", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai") response = completion(
model="google/palm-2-chat-bison",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
@ -127,12 +165,23 @@ def test_completion_openrouter():
def test_completion_openai_with_more_optional_params(): def test_completion_openai_with_more_optional_params():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
frequency_penalty=-0.5,
logit_bias={123: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
print(response['choices'][0]['message']['content']) print(response["choices"][0]["message"]["content"])
print(response.choices[0].message.content) print(response.choices[0].message.content)
if type(response_str) != str: if type(response_str) != str:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -141,14 +190,28 @@ def test_completion_openai_with_more_optional_params():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream(): def test_completion_openai_with_stream():
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai") response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
stream=True,
frequency_penalty=-0.5,
logit_bias={27000: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_functions(): def test_completion_openai_with_functions():
function1 = [ function1 = [
{ {
@ -159,32 +222,38 @@ def test_completion_openai_with_functions():
"properties": { "properties": {
"location": { "location": {
"type": "string", "type": "string",
"description": "The city and state, e.g. San Francisco, CA" "description": "The city and state, e.g. San Francisco, CA",
}, },
"unit": { "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
}, },
"required": ["location"] "required": ["location"],
} },
} }
] ]
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, functions=function1) response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_azure(): def test_completion_azure():
try: try:
response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure") response = completion(
model="gpt-3.5-turbo",
deployment_id="chatgpt-test",
messages=messages,
custom_llm_provider="azure",
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
def test_completion_replicate_llama_stream(): def test_completion_replicate_llama_stream():
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
@ -197,23 +266,32 @@ def test_completion_replicate_llama_stream():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability_stream(): def test_completion_replicate_stability_stream():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try: try:
response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate") response = completion(
model=model_name,
messages=messages,
stream=True,
custom_llm_provider="replicate",
)
# Add any assertions here to check the response # Add any assertions here to check the response
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability(): def test_completion_replicate_stability():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try: try:
response = completion(model=model_name, messages=messages, custom_llm_provider="replicate") response = completion(
model=model_name, messages=messages, custom_llm_provider="replicate"
)
# Add any assertions here to check the response # Add any assertions here to check the response
response_str = response['choices'][0]['message']['content'] response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content response_str_2 = response.choices[0].message.content
print(response_str) print(response_str)
print(response_str_2) print(response_str_2)
@ -224,6 +302,7 @@ def test_completion_replicate_stability():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
######## Test TogetherAI ######## ######## Test TogetherAI ########
def test_completion_together_ai(): def test_completion_together_ai():
model_name = "togethercomputer/llama-2-70b-chat" model_name = "togethercomputer/llama-2-70b-chat"
@ -234,15 +313,22 @@ def test_completion_together_ai():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_petals(): def test_petals():
model_name = "stabilityai/StableBeluga2" model_name = "stabilityai/StableBeluga2"
try: try:
response = completion(model=model_name, messages=messages, custom_llm_provider="petals", force_timeout=120) response = completion(
model=model_name,
messages=messages,
custom_llm_provider="petals",
force_timeout=120,
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# def test_baseten_falcon_7bcompletion(): # def test_baseten_falcon_7bcompletion():
# model_name = "qvv0xeq" # model_name = "qvv0xeq"
# try: # try:
@ -290,7 +376,6 @@ def test_petals():
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
#### Test A121 ################### #### Test A121 ###################
# def test_completion_ai21(): # def test_completion_ai21():
# model_name = "j2-light" # model_name = "j2-light"
@ -333,4 +418,3 @@ def test_petals():
# return # return
# test_completion_together_ai_stream() # test_completion_together_ai_stream()

View file

@ -1,20 +1,33 @@
import sys, os import sys, os
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import completion from litellm import completion
def logging_fn(model_call_dict): def logging_fn(model_call_dict):
print(f"model call details: {model_call_dict}") print(f"model call details: {model_call_dict}")
models = ["gorilla-7b-hf-v1", "gpt-4"] models = ["gorilla-7b-hf-v1", "gpt-4"]
custom_llm_provider = None custom_llm_provider = None
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
for model in models: # iterate through list for model in models: # iterate through list
custom_api_base = None custom_api_base = None
if model == "gorilla-7b-hf-v1": if model == "gorilla-7b-hf-v1":
custom_llm_provider = "custom_openai" custom_llm_provider = "custom_openai"
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1" custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, logger_fn=logging_fn) completion(
model=model,
messages=messages,
custom_llm_provider=custom_llm_provider,
custom_api_base=custom_api_base,
logger_fn=logging_fn,
)

View file

@ -1,9 +1,10 @@
import sys, os import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
from infisical import InfisicalClient from infisical import InfisicalClient
@ -11,9 +12,12 @@ from infisical import InfisicalClient
# # litellm.set_verbose = True # # litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
def test_openai_embedding(): def test_openai_embedding():
try: try:
response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"]) response = embedding(
model="text-embedding-ada-002", input=["good morning from litellm"]
)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {str(response)}") print(f"response: {str(response)}")
except Exception as e: except Exception as e:

View file

@ -2,9 +2,20 @@
import os import os
import sys import sys
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion, AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from litellm import (
embedding,
completion,
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import pytest import pytest
@ -23,6 +34,8 @@ litellm.failure_callback = ["sentry"]
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"] # models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
test_model = "claude-instant-1" test_model = "claude-instant-1"
models = ["claude-instant-1"] models = ["claude-instant-1"]
def logging_fn(model_call_dict): def logging_fn(model_call_dict):
if "model" in model_call_dict: if "model" in model_call_dict:
print(f"model_call_dict: {model_call_dict['model']}") print(f"model_call_dict: {model_call_dict['model']}")
@ -38,7 +51,12 @@ def test_context_window(model):
try: try:
model = "chatgpt-test" model = "chatgpt-test"
print(f"model: {model}") print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn) response = completion(
model=model,
messages=messages,
custom_llm_provider="azure",
logger_fn=logging_fn,
)
print(f"response: {response}") print(f"response: {response}")
except InvalidRequestError as e: except InvalidRequestError as e:
print(f"InvalidRequestError: {e.llm_provider}") print(f"InvalidRequestError: {e.llm_provider}")
@ -52,12 +70,15 @@ def test_context_window(model):
print(f"Uncaught Exception - {e}") print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
return return
test_context_window(test_model) test_context_window(test_model)
# Test 2: InvalidAuth Errors # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models) @pytest.mark.parametrize("model", models)
def invalid_auth(model): # set the model key to an invalid key, depending on the model def invalid_auth(model): # set the model key to an invalid key, depending on the model
messages = [{ "content": "Hello, how are you?","role": "user"}] messages = [{"content": "Hello, how are you?", "role": "user"}]
temporary_key = None temporary_key = None
try: try:
custom_llm_provider = None custom_llm_provider = None
@ -74,22 +95,29 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
elif model == "command-nightly": elif model == "command-nightly":
temporary_key = os.environ["COHERE_API_KEY"] temporary_key = os.environ["COHERE_API_KEY"]
os.environ["COHERE_API_KEY"] = "bad-key" os.environ["COHERE_API_KEY"] = "bad-key"
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": elif (
model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
temporary_key = os.environ["REPLICATE_API_KEY"] temporary_key = os.environ["REPLICATE_API_KEY"]
os.environ["REPLICATE_API_KEY"] = "bad-key" os.environ["REPLICATE_API_KEY"] = "bad-key"
print(f"model: {model}") print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider) response = completion(
model=model, messages=messages, custom_llm_provider=custom_llm_provider
)
print(f"response: {response}") print(f"response: {response}")
except AuthenticationError as e: except AuthenticationError as e:
print(f"AuthenticationError Caught Exception - {e.llm_provider}") print(f"AuthenticationError Caught Exception - {e.llm_provider}")
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server except (
OpenAIError
): # is at least an openai error -> in case of random model errors - e.g. overloaded server
print(f"OpenAIError Caught Exception - {e}") print(f"OpenAIError Caught Exception - {e}")
except Exception as e: except Exception as e:
print(type(e)) print(type(e))
print(e.__class__.__name__) print(e.__class__.__name__)
print(f"Uncaught Exception - {e}") print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
if temporary_key != None: # reset the key if temporary_key != None: # reset the key
if model == "gpt-3.5-turbo": if model == "gpt-3.5-turbo":
os.environ["OPENAI_API_KEY"] = temporary_key os.environ["OPENAI_API_KEY"] = temporary_key
elif model == "chatgpt-test": elif model == "chatgpt-test":
@ -99,9 +127,14 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
os.environ["ANTHROPIC_API_KEY"] = temporary_key os.environ["ANTHROPIC_API_KEY"] = temporary_key
elif model == "command-nightly": elif model == "command-nightly":
os.environ["COHERE_API_KEY"] = temporary_key os.environ["COHERE_API_KEY"] = temporary_key
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": elif (
model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
os.environ["REPLICATE_API_KEY"] = temporary_key os.environ["REPLICATE_API_KEY"] = temporary_key
return return
invalid_auth(test_model) invalid_auth(test_model)
# # Test 3: Rate Limit Errors # # Test 3: Rate Limit Errors
# def test_model(model): # def test_model(model):
@ -142,5 +175,3 @@ invalid_auth(test_model)
# accuracy_score = counts[True]/(counts[True] + counts[False]) # accuracy_score = counts[True]/(counts[True] + counts[False])
# print(f"accuracy_score: {accuracy_score}") # print(f"accuracy_score: {accuracy_score}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback import traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -14,11 +16,15 @@ litellm.success_callback = ["helicone"]
litellm.set_verbose = True litellm.set_verbose = True
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
#openai call # openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(
model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
)
#cohere call # cohere call
response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) response = completion(
model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]
)

View file

@ -1,6 +1,9 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import load_test_model, testing_batch_completion from litellm import load_test_model, testing_batch_completion
@ -16,7 +19,19 @@ from litellm import load_test_model, testing_batch_completion
# print(result) # print(result)
## Quality Test across Model ## Quality Test across Model
models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}] models = [
messages = [[{"role": "user", "content": "What is your name?"}], [{"role": "user", "content": "Hey, how's it going?"}]] "gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"claude-instant-1",
{
"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
"custom_llm_provider": "replicate",
},
]
messages = [
[{"role": "user", "content": "What is your name?"}],
[{"role": "user", "content": "Hey, how's it going?"}],
]
result = testing_batch_completion(models=models, messages=messages) result = testing_batch_completion(models=models, messages=messages)
print(result) print(result)

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -11,25 +14,29 @@ litellm.set_verbose = False
score = 0 score = 0
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
# test on openai completion call # test on openai completion call
try: try:
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# test on non-openai completion call # test on non-openai completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"claude response: {response}") print(f"claude response: {response}")
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -15,7 +18,7 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list: for model in model_fallback_list:
try: try:

View file

@ -4,7 +4,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
@ -13,7 +16,7 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list: for model in model_fallback_list:
try: try:

View file

@ -53,7 +53,6 @@
# # # return this generator to the client for streaming requests # # # return this generator to the client for streaming requests
# # async def get_response(): # # async def get_response():
# # global generator # # global generator
# # async for elem in generator: # # async for elem in generator:

View file

@ -12,7 +12,6 @@
# import asyncio # import asyncio
# user_message = "respond in 20 words. who are you?" # user_message = "respond in 20 words. who are you?"
# messages = [{ "content": user_message,"role": "user"}] # messages = [{ "content": user_message,"role": "user"}]
@ -45,8 +44,3 @@
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_ollama_stream() # test_completion_ollama_stream()

View file

@ -4,7 +4,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import embedding, completion from litellm import embedding, completion
from infisical import InfisicalClient from infisical import InfisicalClient
@ -15,7 +18,7 @@ infisical_token = os.environ["INFISICAL_TOKEN"]
litellm.secret_manager_client = InfisicalClient(token=infisical_token) litellm.secret_manager_client = InfisicalClient(token=infisical_token)
user_message = "Hello, whats the weather in San Francisco??" user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
def test_completion_openai(): def test_completion_openai():
@ -28,5 +31,5 @@ def test_completion_openai():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
litellm.secret_manager_client = None litellm.secret_manager_client = None
test_completion_openai()
test_completion_openai()

View file

@ -3,7 +3,10 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm import completion from litellm import completion
@ -11,18 +14,22 @@ litellm.set_verbose = False
score = 0 score = 0
def logger_fn(model_call_object: dict): def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}] messages = [{"content": user_message, "role": "user"}]
# test on anthropic completion call # test on anthropic completion call
try: try:
response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn) response = completion(
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
)
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
@ -30,10 +37,17 @@ except:
# test on anthropic completion call # test on anthropic completion call
try: try:
response = completion(model="meta-llama/Llama-2-7b-chat-hf", messages=messages, custom_llm_provider="huggingface", custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud", stream=True, logger_fn=logger_fn) response = completion(
model="meta-llama/Llama-2-7b-chat-hf",
messages=messages,
custom_llm_provider="huggingface",
custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud",
stream=True,
logger_fn=logger_fn,
)
for chunk in response: for chunk in response:
print(chunk['choices'][0]['delta']) print(chunk["choices"][0]["delta"])
score +=1 score += 1
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass

View file

@ -3,10 +3,14 @@
import sys, os import sys, os
import traceback import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import time import time
from litellm import timeout from litellm import timeout
@timeout(10) @timeout(10)
def stop_after_10_s(force_timeout=60): def stop_after_10_s(force_timeout=60):
print("Stopping after 10 seconds") print("Stopping after 10 seconds")
@ -17,10 +21,10 @@ def stop_after_10_s(force_timeout=60):
start_time = time.time() start_time = time.time()
try: try:
stop_after_10_s(force_timeout=1) stop_after_10_s(force_timeout=1)
except Exception as e: except Exception as e:
print(e) print(e)
pass pass
end_time = time.time() end_time = time.time()

View file

@ -11,9 +11,7 @@ from threading import Thread
from openai.error import Timeout from openai.error import Timeout
def timeout( def timeout(timeout_duration: float = None, exception_to_raise=Timeout):
timeout_duration: float = None, exception_to_raise = Timeout
):
""" """
Wraps a function to raise the specified exception if execution time Wraps a function to raise the specified exception if execution time
is greater than the specified timeout. is greater than the specified timeout.
@ -44,7 +42,9 @@ def timeout(
result = future.result(timeout=local_timeout_duration) result = future.result(timeout=local_timeout_duration)
except futures.TimeoutError: except futures.TimeoutError:
thread.stop_loop() thread.stop_loop()
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
thread.stop_loop() thread.stop_loop()
return result return result
@ -59,7 +59,9 @@ def timeout(
) )
return value return value
except asyncio.TimeoutError: except asyncio.TimeoutError:
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
if iscoroutinefunction(func): if iscoroutinefunction(func):
return async_wrapper return async_wrapper

File diff suppressed because it is too large Load diff