forked from phoenix/litellm-mirror
add linting
This commit is contained in:
parent
8ef47524bf
commit
15b1da9dc8
40 changed files with 3110 additions and 1709 deletions
|
@ -1,4 +1,5 @@
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
success_callback = []
|
success_callback = []
|
||||||
failure_callback = []
|
failure_callback = []
|
||||||
set_verbose = False
|
set_verbose = False
|
||||||
|
@ -19,33 +20,99 @@ caching = False
|
||||||
hugging_api_token = None
|
hugging_api_token = None
|
||||||
togetherai_api_key = None
|
togetherai_api_key = None
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"gpt-3.5-turbo": {
|
||||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
"max_tokens": 4000,
|
||||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
},
|
||||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
"gpt-35-turbo": {
|
||||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
"max_tokens": 4000,
|
||||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
}, # azure model name
|
||||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
"gpt-3.5-turbo-0613": {
|
||||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
"max_tokens": 4000,
|
||||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
"input_cost_per_token": 0.0000015,
|
||||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
},
|
||||||
|
"gpt-3.5-turbo-0301": {
|
||||||
|
"max_tokens": 4000,
|
||||||
|
"input_cost_per_token": 0.0000015,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-35-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
}, # azure model name
|
||||||
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-4": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-0613": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-32k": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.00006,
|
||||||
|
"output_cost_per_token": 0.00012,
|
||||||
|
},
|
||||||
|
"claude-instant-1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00000163,
|
||||||
|
"output_cost_per_token": 0.00000551,
|
||||||
|
},
|
||||||
|
"claude-2": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00001102,
|
||||||
|
"output_cost_per_token": 0.00003268,
|
||||||
|
},
|
||||||
|
"text-bison-001": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"chat-bison-001": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000002,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"command-nightly": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
####### THREAD-SPECIFIC DATA ###################
|
####### THREAD-SPECIFIC DATA ###################
|
||||||
class MyLocal(threading.local):
|
class MyLocal(threading.local):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.user = "Hello World"
|
self.user = "Hello World"
|
||||||
|
|
||||||
|
|
||||||
_thread_context = MyLocal()
|
_thread_context = MyLocal()
|
||||||
|
|
||||||
|
|
||||||
def identify(event_details):
|
def identify(event_details):
|
||||||
# Store user in thread local data
|
# Store user in thread local data
|
||||||
if "user" in event_details:
|
if "user" in event_details:
|
||||||
_thread_context.user = event_details["user"]
|
_thread_context.user = event_details["user"]
|
||||||
|
|
||||||
|
|
||||||
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
|
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
|
||||||
api_base = None
|
api_base = None
|
||||||
headers = None
|
headers = None
|
||||||
|
@ -66,50 +133,38 @@ open_ai_chat_completion_models = [
|
||||||
"gpt-3.5-turbo-0613",
|
"gpt-3.5-turbo-0613",
|
||||||
"gpt-3.5-turbo-16k-0613",
|
"gpt-3.5-turbo-16k-0613",
|
||||||
]
|
]
|
||||||
open_ai_text_completion_models = [
|
open_ai_text_completion_models = ["text-davinci-003"]
|
||||||
'text-davinci-003'
|
|
||||||
]
|
|
||||||
|
|
||||||
cohere_models = [
|
cohere_models = [
|
||||||
'command-nightly',
|
"command-nightly",
|
||||||
"command",
|
"command",
|
||||||
"command-light",
|
"command-light",
|
||||||
"command-medium-beta",
|
"command-medium-beta",
|
||||||
"command-xlarge-beta"
|
"command-xlarge-beta",
|
||||||
]
|
]
|
||||||
|
|
||||||
anthropic_models = [
|
anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
|
||||||
"claude-2",
|
|
||||||
"claude-instant-1",
|
|
||||||
"claude-instant-1.2"
|
|
||||||
]
|
|
||||||
|
|
||||||
replicate_models = [
|
replicate_models = [
|
||||||
"replicate/"
|
"replicate/"
|
||||||
] # placeholder, to make sure we accept any replicate model in our model_list
|
] # placeholder, to make sure we accept any replicate model in our model_list
|
||||||
|
|
||||||
openrouter_models = [
|
openrouter_models = [
|
||||||
'google/palm-2-codechat-bison',
|
"google/palm-2-codechat-bison",
|
||||||
'google/palm-2-chat-bison',
|
"google/palm-2-chat-bison",
|
||||||
'openai/gpt-3.5-turbo',
|
"openai/gpt-3.5-turbo",
|
||||||
'openai/gpt-3.5-turbo-16k',
|
"openai/gpt-3.5-turbo-16k",
|
||||||
'openai/gpt-4-32k',
|
"openai/gpt-4-32k",
|
||||||
'anthropic/claude-2',
|
"anthropic/claude-2",
|
||||||
'anthropic/claude-instant-v1',
|
"anthropic/claude-instant-v1",
|
||||||
'meta-llama/llama-2-13b-chat',
|
"meta-llama/llama-2-13b-chat",
|
||||||
'meta-llama/llama-2-70b-chat'
|
"meta-llama/llama-2-70b-chat",
|
||||||
]
|
]
|
||||||
|
|
||||||
vertex_chat_models = [
|
vertex_chat_models = ["chat-bison", "chat-bison@001"]
|
||||||
"chat-bison",
|
|
||||||
"chat-bison@001"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
vertex_text_models = [
|
vertex_text_models = ["text-bison", "text-bison@001"]
|
||||||
"text-bison",
|
|
||||||
"text-bison@001"
|
|
||||||
]
|
|
||||||
|
|
||||||
huggingface_models = [
|
huggingface_models = [
|
||||||
"meta-llama/Llama-2-7b-hf",
|
"meta-llama/Llama-2-7b-hf",
|
||||||
|
@ -126,23 +181,54 @@ huggingface_models = [
|
||||||
"meta-llama/Llama-2-70b-chat",
|
"meta-llama/Llama-2-70b-chat",
|
||||||
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
|
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
|
||||||
|
|
||||||
ai21_models = [
|
ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
|
||||||
"j2-ultra",
|
|
||||||
"j2-mid",
|
model_list = (
|
||||||
"j2-light"
|
open_ai_chat_completion_models
|
||||||
|
+ open_ai_text_completion_models
|
||||||
|
+ cohere_models
|
||||||
|
+ anthropic_models
|
||||||
|
+ replicate_models
|
||||||
|
+ openrouter_models
|
||||||
|
+ huggingface_models
|
||||||
|
+ vertex_chat_models
|
||||||
|
+ vertex_text_models
|
||||||
|
+ ai21_models
|
||||||
|
)
|
||||||
|
|
||||||
|
provider_list = [
|
||||||
|
"openai",
|
||||||
|
"cohere",
|
||||||
|
"anthropic",
|
||||||
|
"replicate",
|
||||||
|
"huggingface",
|
||||||
|
"together_ai",
|
||||||
|
"openrouter",
|
||||||
|
"vertex_ai",
|
||||||
|
"ai21",
|
||||||
]
|
]
|
||||||
|
|
||||||
model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models
|
|
||||||
|
|
||||||
provider_list = ["openai", "cohere", "anthropic", "replicate", "huggingface", "together_ai", "openrouter", "vertex_ai", "ai21"]
|
|
||||||
####### EMBEDDING MODELS ###################
|
####### EMBEDDING MODELS ###################
|
||||||
open_ai_embedding_models = [
|
open_ai_embedding_models = ["text-embedding-ada-002"]
|
||||||
'text-embedding-ada-002'
|
|
||||||
]
|
|
||||||
|
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .testing import *
|
from .testing import *
|
||||||
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, get_litellm_params
|
from .utils import (
|
||||||
|
client,
|
||||||
|
logging,
|
||||||
|
exception_type,
|
||||||
|
get_optional_params,
|
||||||
|
modify_integration,
|
||||||
|
token_counter,
|
||||||
|
cost_per_token,
|
||||||
|
completion_cost,
|
||||||
|
get_litellm_params,
|
||||||
|
)
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
from .integrations import *
|
from .integrations import *
|
||||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from openai.error import (
|
||||||
|
AuthenticationError,
|
||||||
|
InvalidRequestError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
OpenAIError,
|
||||||
|
)
|
||||||
|
|
|
@ -1,12 +1,21 @@
|
||||||
## LiteLLM versions of the OpenAI Exception Types
|
## LiteLLM versions of the OpenAI Exception Types
|
||||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from openai.error import (
|
||||||
|
AuthenticationError,
|
||||||
|
InvalidRequestError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
OpenAIError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationError(AuthenticationError):
|
class AuthenticationError(AuthenticationError):
|
||||||
def __init__(self, message, llm_provider):
|
def __init__(self, message, llm_provider):
|
||||||
self.status_code = 401
|
self.status_code = 401
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
super().__init__(self.message) # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class InvalidRequestError(InvalidRequestError):
|
class InvalidRequestError(InvalidRequestError):
|
||||||
|
@ -15,7 +24,9 @@ class InvalidRequestError(InvalidRequestError):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
super().__init__(self.message, f"{self.model}") # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message, f"{self.model}"
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class RateLimitError(RateLimitError):
|
class RateLimitError(RateLimitError):
|
||||||
|
@ -23,21 +34,29 @@ class RateLimitError(RateLimitError):
|
||||||
self.status_code = 429
|
self.status_code = 429
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
super().__init__(self.message) # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class ServiceUnavailableError(ServiceUnavailableError):
|
class ServiceUnavailableError(ServiceUnavailableError):
|
||||||
def __init__(self, message, llm_provider):
|
def __init__(self, message, llm_provider):
|
||||||
self.status_code = 500
|
self.status_code = 500
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
super().__init__(self.message) # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class OpenAIError(OpenAIError):
|
class OpenAIError(OpenAIError):
|
||||||
def __init__(self, original_exception):
|
def __init__(self, original_exception):
|
||||||
self.status_code = original_exception.http_status
|
self.status_code = original_exception.http_status
|
||||||
super().__init__(http_body=original_exception.http_body,
|
super().__init__(
|
||||||
|
http_body=original_exception.http_body,
|
||||||
http_status=original_exception.http_status,
|
http_status=original_exception.http_status,
|
||||||
json_body=original_exception.json_body,
|
json_body=original_exception.json_body,
|
||||||
headers=original_exception.headers,
|
headers=original_exception.headers,
|
||||||
code=original_exception.code)
|
code=original_exception.code,
|
||||||
|
)
|
||||||
self.llm_provider = "openai"
|
self.llm_provider = "openai"
|
|
@ -2,28 +2,90 @@
|
||||||
# On success + failure, log events to aispend.io
|
# On success + failure, log events to aispend.io
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"gpt-3.5-turbo": {
|
||||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
"max_tokens": 4000,
|
||||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
},
|
||||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
"gpt-35-turbo": {
|
||||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
"max_tokens": 4000,
|
||||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
}, # azure model name
|
||||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
"gpt-3.5-turbo-0613": {
|
||||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
"max_tokens": 4000,
|
||||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
"input_cost_per_token": 0.0000015,
|
||||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
},
|
||||||
|
"gpt-3.5-turbo-0301": {
|
||||||
|
"max_tokens": 4000,
|
||||||
|
"input_cost_per_token": 0.0000015,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-35-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
}, # azure model name
|
||||||
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-4": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-0613": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-32k": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.00006,
|
||||||
|
"output_cost_per_token": 0.00012,
|
||||||
|
},
|
||||||
|
"claude-instant-1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00000163,
|
||||||
|
"output_cost_per_token": 0.00000551,
|
||||||
|
},
|
||||||
|
"claude-2": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00001102,
|
||||||
|
"output_cost_per_token": 0.00003268,
|
||||||
|
},
|
||||||
|
"text-bison-001": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"chat-bison-001": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000002,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"command-nightly": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class AISpendLogger:
|
class AISpendLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -37,8 +99,14 @@ class AISpendLogger:
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
if model in model_cost:
|
if model in model_cost:
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate models are charged based on time
|
# replicate models are charged based on time
|
||||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||||
|
@ -55,27 +123,41 @@ class AISpendLogger:
|
||||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
|
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
|
||||||
# Method definition
|
# Method definition
|
||||||
try:
|
try:
|
||||||
print_verbose(f"AISpend Logging - Enters logging function for model {model}")
|
print_verbose(
|
||||||
|
f"AISpend Logging - Enters logging function for model {model}"
|
||||||
|
)
|
||||||
|
|
||||||
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
|
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization': f'Bearer {self.api_key}',
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
'Content-Type': 'application/json'
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d')
|
response_timestamp = datetime.datetime.fromtimestamp(
|
||||||
|
int(response_obj["created"])
|
||||||
|
).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
(
|
||||||
|
prompt_tokens_cost_usd_dollar,
|
||||||
|
completion_tokens_cost_usd_dollar,
|
||||||
|
) = self.price_calculator(model, response_obj, start_time, end_time)
|
||||||
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
|
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
|
||||||
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
|
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
|
||||||
data = [{
|
data = [
|
||||||
|
{
|
||||||
"requests": 1,
|
"requests": 1,
|
||||||
"requests_context": 1,
|
"requests_context": 1,
|
||||||
"context_tokens": response_obj["usage"]["prompt_tokens"],
|
"context_tokens": response_obj["usage"]["prompt_tokens"],
|
||||||
|
@ -84,8 +166,9 @@ class AISpendLogger:
|
||||||
"recorded_date": response_timestamp,
|
"recorded_date": response_timestamp,
|
||||||
"model_id": response_obj["model"],
|
"model_id": response_obj["model"],
|
||||||
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
|
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
|
||||||
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent
|
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
|
||||||
}]
|
}
|
||||||
|
]
|
||||||
|
|
||||||
print_verbose(f"AISpend Logging - final data object: {data}")
|
print_verbose(f"AISpend Logging - final data object: {data}")
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -2,28 +2,90 @@
|
||||||
# On success + failure, log events to aispend.io
|
# On success + failure, log events to aispend.io
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"gpt-3.5-turbo": {
|
||||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
"max_tokens": 4000,
|
||||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
},
|
||||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
"gpt-35-turbo": {
|
||||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
"max_tokens": 4000,
|
||||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
}, # azure model name
|
||||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
"gpt-3.5-turbo-0613": {
|
||||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
"max_tokens": 4000,
|
||||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
"input_cost_per_token": 0.0000015,
|
||||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
},
|
||||||
|
"gpt-3.5-turbo-0301": {
|
||||||
|
"max_tokens": 4000,
|
||||||
|
"input_cost_per_token": 0.0000015,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-35-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
}, # azure model name
|
||||||
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-4": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-0613": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-32k": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.00006,
|
||||||
|
"output_cost_per_token": 0.00012,
|
||||||
|
},
|
||||||
|
"claude-instant-1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00000163,
|
||||||
|
"output_cost_per_token": 0.00000551,
|
||||||
|
},
|
||||||
|
"claude-2": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00001102,
|
||||||
|
"output_cost_per_token": 0.00003268,
|
||||||
|
},
|
||||||
|
"text-bison-001": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"chat-bison-001": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000002,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"command-nightly": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BerriSpendLogger:
|
class BerriSpendLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -36,8 +98,14 @@ class BerriSpendLogger:
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
if model in model_cost:
|
if model in model_cost:
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate models are charged based on time
|
# replicate models are charged based on time
|
||||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||||
|
@ -54,42 +122,59 @@ class BerriSpendLogger:
|
||||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
|
def log_event(
|
||||||
|
self, model, messages, response_obj, start_time, end_time, print_verbose
|
||||||
|
):
|
||||||
# Method definition
|
# Method definition
|
||||||
try:
|
try:
|
||||||
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
|
print_verbose(
|
||||||
|
f"BerriSpend Logging - Enters logging function for model {model}"
|
||||||
|
)
|
||||||
|
|
||||||
url = f"https://berrispend.berri.ai/spend"
|
url = f"https://berrispend.berri.ai/spend"
|
||||||
headers = {
|
headers = {"Content-Type": "application/json"}
|
||||||
'Content-Type': 'application/json'
|
|
||||||
}
|
|
||||||
|
|
||||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
(
|
||||||
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
prompt_tokens_cost_usd_dollar,
|
||||||
|
completion_tokens_cost_usd_dollar,
|
||||||
|
) = self.price_calculator(model, response_obj, start_time, end_time)
|
||||||
|
total_cost = (
|
||||||
|
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
|
)
|
||||||
|
|
||||||
response_time = (end_time - start_time).total_seconds()
|
response_time = (end_time - start_time).total_seconds()
|
||||||
if "response" in response_obj:
|
if "response" in response_obj:
|
||||||
data = [{
|
data = [
|
||||||
|
{
|
||||||
"response_time": response_time,
|
"response_time": response_time,
|
||||||
"model_id": response_obj["model"],
|
"model_id": response_obj["model"],
|
||||||
"total_cost": total_cost,
|
"total_cost": total_cost,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"response": response_obj['choices'][0]['message']['content'],
|
"response": response_obj["choices"][0]["message"]["content"],
|
||||||
"account_id": self.account_id
|
"account_id": self.account_id,
|
||||||
}]
|
}
|
||||||
|
]
|
||||||
elif "error" in response_obj:
|
elif "error" in response_obj:
|
||||||
data = [{
|
data = [
|
||||||
|
{
|
||||||
"response_time": response_time,
|
"response_time": response_time,
|
||||||
"model_id": response_obj["model"],
|
"model_id": response_obj["model"],
|
||||||
"total_cost": total_cost,
|
"total_cost": total_cost,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"error": response_obj['error'],
|
"error": response_obj["error"],
|
||||||
"account_id": self.account_id
|
"account_id": self.account_id,
|
||||||
}]
|
}
|
||||||
|
]
|
||||||
|
|
||||||
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
||||||
response = requests.post(url, headers=headers, json=data)
|
response = requests.post(url, headers=headers, json=data)
|
||||||
|
|
|
@ -2,18 +2,23 @@
|
||||||
# On success, logs events to Helicone
|
# On success, logs events to Helicone
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
class HeliconeLogger:
|
class HeliconeLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
helicone_model_list = ["gpt", "claude"]
|
helicone_model_list = ["gpt", "claude"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Instance variables
|
# Instance variables
|
||||||
self.provider_url = "https://api.openai.com/v1"
|
self.provider_url = "https://api.openai.com/v1"
|
||||||
self.key = os.getenv('HELICONE_API_KEY')
|
self.key = os.getenv("HELICONE_API_KEY")
|
||||||
|
|
||||||
def claude_mapping(self, model, messages, response_obj):
|
def claude_mapping(self, model, messages, response_obj):
|
||||||
from anthropic import HUMAN_PROMPT, AI_PROMPT
|
from anthropic import HUMAN_PROMPT, AI_PROMPT
|
||||||
|
|
||||||
prompt = f"{HUMAN_PROMPT}"
|
prompt = f"{HUMAN_PROMPT}"
|
||||||
for message in messages:
|
for message in messages:
|
||||||
if "role" in message:
|
if "role" in message:
|
||||||
|
@ -26,46 +31,82 @@ class HeliconeLogger:
|
||||||
prompt += f"{AI_PROMPT}"
|
prompt += f"{AI_PROMPT}"
|
||||||
claude_provider_request = {"model": model, "prompt": prompt}
|
claude_provider_request = {"model": model, "prompt": prompt}
|
||||||
|
|
||||||
claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"}
|
claude_response_obj = {
|
||||||
|
"completion": response_obj["choices"][0]["message"]["content"],
|
||||||
|
"model": model,
|
||||||
|
"stop_reason": "stop_sequence",
|
||||||
|
}
|
||||||
|
|
||||||
return claude_provider_request, claude_response_obj
|
return claude_provider_request, claude_response_obj
|
||||||
|
|
||||||
def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose):
|
def log_success(
|
||||||
|
self, model, messages, response_obj, start_time, end_time, print_verbose
|
||||||
|
):
|
||||||
# Method definition
|
# Method definition
|
||||||
try:
|
try:
|
||||||
print_verbose(f"Helicone Logging - Enters logging function for model {model}")
|
print_verbose(
|
||||||
model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo"
|
f"Helicone Logging - Enters logging function for model {model}"
|
||||||
|
)
|
||||||
|
model = (
|
||||||
|
model
|
||||||
|
if any(
|
||||||
|
accepted_model in model
|
||||||
|
for accepted_model in self.helicone_model_list
|
||||||
|
)
|
||||||
|
else "gpt-3.5-turbo"
|
||||||
|
)
|
||||||
provider_request = {"model": model, "messages": messages}
|
provider_request = {"model": model, "messages": messages}
|
||||||
|
|
||||||
if "claude" in model:
|
if "claude" in model:
|
||||||
provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj)
|
provider_request, response_obj = self.claude_mapping(
|
||||||
|
model=model, messages=messages, response_obj=response_obj
|
||||||
|
)
|
||||||
|
|
||||||
providerResponse = {
|
providerResponse = {
|
||||||
"json": response_obj,
|
"json": response_obj,
|
||||||
"headers": {"openai-version": "2020-10-01"},
|
"headers": {"openai-version": "2020-10-01"},
|
||||||
"status": 200
|
"status": 200,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Code to be executed
|
# Code to be executed
|
||||||
url = "https://api.hconeai.com/oai/v1/log"
|
url = "https://api.hconeai.com/oai/v1/log"
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization': f'Bearer {self.key}',
|
"Authorization": f"Bearer {self.key}",
|
||||||
'Content-Type': 'application/json'
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
start_time_seconds = int(start_time.timestamp())
|
start_time_seconds = int(start_time.timestamp())
|
||||||
start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000)
|
start_time_milliseconds = int(
|
||||||
|
(start_time.timestamp() - start_time_seconds) * 1000
|
||||||
|
)
|
||||||
end_time_seconds = int(end_time.timestamp())
|
end_time_seconds = int(end_time.timestamp())
|
||||||
end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000)
|
end_time_milliseconds = int(
|
||||||
|
(end_time.timestamp() - end_time_seconds) * 1000
|
||||||
|
)
|
||||||
data = {
|
data = {
|
||||||
"providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}},
|
"providerRequest": {
|
||||||
|
"url": self.provider_url,
|
||||||
|
"json": provider_request,
|
||||||
|
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
|
||||||
|
},
|
||||||
"providerResponse": providerResponse,
|
"providerResponse": providerResponse,
|
||||||
"timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..}
|
"timing": {
|
||||||
|
"startTime": {
|
||||||
|
"seconds": start_time_seconds,
|
||||||
|
"milliseconds": start_time_milliseconds,
|
||||||
|
},
|
||||||
|
"endTime": {
|
||||||
|
"seconds": end_time_seconds,
|
||||||
|
"milliseconds": end_time_milliseconds,
|
||||||
|
},
|
||||||
|
}, # {"seconds": .., "milliseconds": ..}
|
||||||
}
|
}
|
||||||
response = requests.post(url, headers=headers, json=data)
|
response = requests.post(url, headers=headers, json=data)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
print_verbose("Helicone Logging - Success!")
|
print_verbose("Helicone Logging - Success!")
|
||||||
else:
|
else:
|
||||||
print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}")
|
print_verbose(
|
||||||
|
f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
|
||||||
|
)
|
||||||
print_verbose(f"Helicone Logging - Error {response.text}")
|
print_verbose(f"Helicone Logging - Error {response.text}")
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
# traceback.print_exc()
|
||||||
|
|
|
@ -3,31 +3,94 @@
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import datetime, subprocess, sys
|
import datetime, subprocess, sys
|
||||||
|
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"gpt-3.5-turbo": {
|
||||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
"max_tokens": 4000,
|
||||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
},
|
||||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
"gpt-35-turbo": {
|
||||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
"max_tokens": 4000,
|
||||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"input_cost_per_token": 0.0000015,
|
||||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
"output_cost_per_token": 0.000002,
|
||||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
}, # azure model name
|
||||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
"gpt-3.5-turbo-0613": {
|
||||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
"max_tokens": 4000,
|
||||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
"input_cost_per_token": 0.0000015,
|
||||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
"output_cost_per_token": 0.000002,
|
||||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
},
|
||||||
|
"gpt-3.5-turbo-0301": {
|
||||||
|
"max_tokens": 4000,
|
||||||
|
"input_cost_per_token": 0.0000015,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-35-turbo-16k": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
}, # azure model name
|
||||||
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
|
"max_tokens": 16000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"gpt-4": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-0613": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.00006,
|
||||||
|
},
|
||||||
|
"gpt-4-32k": {
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"input_cost_per_token": 0.00006,
|
||||||
|
"output_cost_per_token": 0.00012,
|
||||||
|
},
|
||||||
|
"claude-instant-1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00000163,
|
||||||
|
"output_cost_per_token": 0.00000551,
|
||||||
|
},
|
||||||
|
"claude-2": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.00001102,
|
||||||
|
"output_cost_per_token": 0.00003268,
|
||||||
|
},
|
||||||
|
"text-bison-001": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
},
|
||||||
|
"chat-bison-001": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000002,
|
||||||
|
"output_cost_per_token": 0.000002,
|
||||||
|
},
|
||||||
|
"command-nightly": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Supabase:
|
class Supabase:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
supabase_table_name = "request_logs"
|
supabase_table_name = "request_logs"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Instance variables
|
# Instance variables
|
||||||
self.supabase_url = os.getenv("SUPABASE_URL")
|
self.supabase_url = os.getenv("SUPABASE_URL")
|
||||||
|
@ -35,9 +98,11 @@ class Supabase:
|
||||||
try:
|
try:
|
||||||
import supabase
|
import supabase
|
||||||
except ImportError:
|
except ImportError:
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase'])
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
|
||||||
import supabase
|
import supabase
|
||||||
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key)
|
self.supabase_client = supabase.create_client(
|
||||||
|
self.supabase_url, self.supabase_key
|
||||||
|
)
|
||||||
|
|
||||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||||
# try and find if the model is in the model_cost map
|
# try and find if the model is in the model_cost map
|
||||||
|
@ -45,8 +110,14 @@ class Supabase:
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
if model in model_cost:
|
if model in model_cost:
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate models are charged based on time
|
# replicate models are charged based on time
|
||||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||||
|
@ -63,16 +134,38 @@ class Supabase:
|
||||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
model_cost[model]["input_cost_per_token"]
|
||||||
|
* response_obj["usage"]["prompt_tokens"]
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost[model]["output_cost_per_token"]
|
||||||
|
* response_obj["usage"]["completion_tokens"]
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose):
|
def log_event(
|
||||||
|
self,
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
end_user,
|
||||||
|
response_obj,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
print_verbose,
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}")
|
print_verbose(
|
||||||
|
f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
|
||||||
|
)
|
||||||
|
|
||||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
(
|
||||||
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
prompt_tokens_cost_usd_dollar,
|
||||||
|
completion_tokens_cost_usd_dollar,
|
||||||
|
) = self.price_calculator(model, response_obj, start_time, end_time)
|
||||||
|
total_cost = (
|
||||||
|
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
|
)
|
||||||
|
|
||||||
response_time = (end_time - start_time).total_seconds()
|
response_time = (end_time - start_time).total_seconds()
|
||||||
if "choices" in response_obj:
|
if "choices" in response_obj:
|
||||||
|
@ -81,22 +174,34 @@ class Supabase:
|
||||||
"model": response_obj["model"],
|
"model": response_obj["model"],
|
||||||
"total_cost": total_cost,
|
"total_cost": total_cost,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"response": response_obj['choices'][0]['message']['content'],
|
"response": response_obj["choices"][0]["message"]["content"],
|
||||||
"end_user": end_user
|
"end_user": end_user,
|
||||||
}
|
}
|
||||||
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
|
print_verbose(
|
||||||
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
|
f"Supabase Logging - final data object: {supabase_data_obj}"
|
||||||
|
)
|
||||||
|
data, count = (
|
||||||
|
self.supabase_client.table(self.supabase_table_name)
|
||||||
|
.insert(supabase_data_obj)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
elif "error" in response_obj:
|
elif "error" in response_obj:
|
||||||
supabase_data_obj = {
|
supabase_data_obj = {
|
||||||
"response_time": response_time,
|
"response_time": response_time,
|
||||||
"model": response_obj["model"],
|
"model": response_obj["model"],
|
||||||
"total_cost": total_cost,
|
"total_cost": total_cost,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"error": response_obj['error'],
|
"error": response_obj["error"],
|
||||||
"end_user": end_user
|
"end_user": end_user,
|
||||||
}
|
}
|
||||||
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
|
print_verbose(
|
||||||
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
|
f"Supabase Logging - final data object: {supabase_data_obj}"
|
||||||
|
)
|
||||||
|
data, count = (
|
||||||
|
self.supabase_client.table(self.supabase_table_name)
|
||||||
|
.insert(supabase_data_obj)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
# traceback.print_exc()
|
||||||
|
|
|
@ -6,18 +6,22 @@ import time
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from litellm.utils import ModelResponse
|
from litellm.utils import ModelResponse
|
||||||
|
|
||||||
|
|
||||||
class AnthropicConstants(Enum):
|
class AnthropicConstants(Enum):
|
||||||
HUMAN_PROMPT = "\n\nHuman:"
|
HUMAN_PROMPT = "\n\nHuman:"
|
||||||
AI_PROMPT = "\n\nAssistant:"
|
AI_PROMPT = "\n\nAssistant:"
|
||||||
|
|
||||||
|
|
||||||
class AnthropicError(Exception):
|
class AnthropicError(Exception):
|
||||||
def __init__(self, status_code, message):
|
def __init__(self, status_code, message):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.message = message
|
self.message = message
|
||||||
super().__init__(self.message) # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class AnthropicLLM:
|
class AnthropicLLM:
|
||||||
|
|
||||||
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
|
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.default_max_tokens_to_sample = default_max_tokens_to_sample
|
self.default_max_tokens_to_sample = default_max_tokens_to_sample
|
||||||
|
@ -25,31 +29,50 @@ class AnthropicLLM:
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.validate_environment(api_key=api_key)
|
self.validate_environment(api_key=api_key)
|
||||||
|
|
||||||
def validate_environment(self, api_key): # set up the environment required to run the model
|
def validate_environment(
|
||||||
|
self, api_key
|
||||||
|
): # set up the environment required to run the model
|
||||||
# set the api key
|
# set the api key
|
||||||
if self.api_key == None:
|
if self.api_key == None:
|
||||||
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
|
raise ValueError(
|
||||||
|
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
|
||||||
|
)
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"accept": "application/json",
|
"accept": "application/json",
|
||||||
"anthropic-version": "2023-06-01",
|
"anthropic-version": "2023-06-01",
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
"x-api-key": self.api_key
|
"x-api-key": self.api_key,
|
||||||
}
|
}
|
||||||
|
|
||||||
def completion(self, model: str, messages: list, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
|
def completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: list,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
print_verbose: Callable,
|
||||||
|
optional_params=None,
|
||||||
|
litellm_params=None,
|
||||||
|
logger_fn=None,
|
||||||
|
): # logic for parsing in - calling - parsing out model completion calls
|
||||||
model = model
|
model = model
|
||||||
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
|
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
|
||||||
for message in messages:
|
for message in messages:
|
||||||
if "role" in message:
|
if "role" in message:
|
||||||
if message["role"] == "user":
|
if message["role"] == "user":
|
||||||
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
|
prompt += (
|
||||||
|
f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
|
prompt += (
|
||||||
|
f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
|
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
|
||||||
prompt += f"{AnthropicConstants.AI_PROMPT.value}"
|
prompt += f"{AnthropicConstants.AI_PROMPT.value}"
|
||||||
if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'):
|
if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
|
||||||
|
"inf"
|
||||||
|
):
|
||||||
max_tokens = optional_params["max_tokens"]
|
max_tokens = optional_params["max_tokens"]
|
||||||
else:
|
else:
|
||||||
max_tokens = self.default_max_tokens_to_sample
|
max_tokens = self.default_max_tokens_to_sample
|
||||||
|
@ -57,37 +80,64 @@ class AnthropicLLM:
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"max_tokens_to_sample": max_tokens,
|
"max_tokens_to_sample": max_tokens,
|
||||||
**optional_params
|
**optional_params,
|
||||||
}
|
}
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
additional_args={
|
||||||
|
"litellm_params": litellm_params,
|
||||||
|
"optional_params": optional_params,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data))
|
response = requests.post(
|
||||||
|
self.completion_url, headers=self.headers, data=json.dumps(data)
|
||||||
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
return response.iter_lines()
|
return response.iter_lines()
|
||||||
else:
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
additional_args={
|
||||||
|
"litellm_params": litellm_params,
|
||||||
|
"optional_params": optional_params,
|
||||||
|
"original_response": response.text,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
print_verbose(f"raw model_response: {response.text}")
|
print_verbose(f"raw model_response: {response.text}")
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
completion_response = response.json()
|
completion_response = response.json()
|
||||||
if "error" in completion_response:
|
if "error" in completion_response:
|
||||||
raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
|
raise AnthropicError(
|
||||||
|
message=completion_response["error"],
|
||||||
|
status_code=response.status_code,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
|
model_response["choices"][0]["message"][
|
||||||
|
"content"
|
||||||
|
] = completion_response["completion"]
|
||||||
|
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
|
prompt_tokens = len(
|
||||||
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
|
self.encoding.encode(prompt)
|
||||||
|
) ##[TODO] use the anthropic tokenizer here
|
||||||
|
completion_tokens = len(
|
||||||
|
self.encoding.encode(model_response["choices"][0]["message"]["content"])
|
||||||
|
) ##[TODO] use the anthropic tokenizer here
|
||||||
|
|
||||||
model_response["created"] = time.time()
|
model_response["created"] = time.time()
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
}
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
## This is a template base class to be used for adding new LLM providers via API calls
|
## This is a template base class to be used for adding new LLM providers via API calls
|
||||||
|
|
||||||
class BaseLLM():
|
|
||||||
|
class BaseLLM:
|
||||||
def validate_environment(): # set up the environment required to run the model
|
def validate_environment(): # set up the environment required to run the model
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -7,18 +7,24 @@ import time
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from litellm.utils import ModelResponse
|
from litellm.utils import ModelResponse
|
||||||
|
|
||||||
|
|
||||||
class HuggingfaceError(Exception):
|
class HuggingfaceError(Exception):
|
||||||
def __init__(self, status_code, message):
|
def __init__(self, status_code, message):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.message = message
|
self.message = message
|
||||||
super().__init__(self.message) # Call the base class constructor with the parameters it needs
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
class HuggingfaceRestAPILLM():
|
|
||||||
|
class HuggingfaceRestAPILLM:
|
||||||
def __init__(self, encoding, api_key=None) -> None:
|
def __init__(self, encoding, api_key=None) -> None:
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.validate_environment(api_key=api_key)
|
self.validate_environment(api_key=api_key)
|
||||||
|
|
||||||
def validate_environment(self, api_key): # set up the environment required to run the model
|
def validate_environment(
|
||||||
|
self, api_key
|
||||||
|
): # set up the environment required to run the model
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
|
@ -27,7 +33,17 @@ class HuggingfaceRestAPILLM():
|
||||||
if self.api_key != None:
|
if self.api_key != None:
|
||||||
self.headers["Authorization"] = f"Bearer {self.api_key}"
|
self.headers["Authorization"] = f"Bearer {self.api_key}"
|
||||||
|
|
||||||
def completion(self, model: str, messages: list, custom_api_base: str, model_response: ModelResponse, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
|
def completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: list,
|
||||||
|
custom_api_base: str,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
print_verbose: Callable,
|
||||||
|
optional_params=None,
|
||||||
|
litellm_params=None,
|
||||||
|
logger_fn=None,
|
||||||
|
): # logic for parsing in - calling - parsing out model completion calls
|
||||||
if custom_api_base:
|
if custom_api_base:
|
||||||
completion_url = custom_api_base
|
completion_url = custom_api_base
|
||||||
elif "HF_API_BASE" in os.environ:
|
elif "HF_API_BASE" in os.environ:
|
||||||
|
@ -35,7 +51,9 @@ class HuggingfaceRestAPILLM():
|
||||||
else:
|
else:
|
||||||
completion_url = f"https://api-inference.huggingface.co/models/{model}"
|
completion_url = f"https://api-inference.huggingface.co/models/{model}"
|
||||||
prompt = ""
|
prompt = ""
|
||||||
if "meta-llama" in model and "chat" in model: # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
|
if (
|
||||||
|
"meta-llama" in model and "chat" in model
|
||||||
|
): # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
|
||||||
prompt = "<s>"
|
prompt = "<s>"
|
||||||
for message in messages:
|
for message in messages:
|
||||||
if message["role"] == "system":
|
if message["role"] == "system":
|
||||||
|
@ -57,14 +75,33 @@ class HuggingfaceRestAPILLM():
|
||||||
# "parameters": optional_params
|
# "parameters": optional_params
|
||||||
}
|
}
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
additional_args={
|
||||||
|
"litellm_params": litellm_params,
|
||||||
|
"optional_params": optional_params,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = requests.post(completion_url, headers=self.headers, data=json.dumps(data))
|
response = requests.post(
|
||||||
|
completion_url, headers=self.headers, data=json.dumps(data)
|
||||||
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
return response.iter_lines()
|
return response.iter_lines()
|
||||||
else:
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
additional_args={
|
||||||
|
"litellm_params": litellm_params,
|
||||||
|
"optional_params": optional_params,
|
||||||
|
"original_response": response.text,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
print_verbose(f"raw model_response: {response.text}")
|
print_verbose(f"raw model_response: {response.text}")
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
completion_response = response.json()
|
completion_response = response.json()
|
||||||
|
@ -72,21 +109,29 @@ class HuggingfaceRestAPILLM():
|
||||||
if isinstance(completion_response, dict) and "error" in completion_response:
|
if isinstance(completion_response, dict) and "error" in completion_response:
|
||||||
print_verbose(f"completion error: {completion_response['error']}")
|
print_verbose(f"completion error: {completion_response['error']}")
|
||||||
print_verbose(f"response.status_code: {response.status_code}")
|
print_verbose(f"response.status_code: {response.status_code}")
|
||||||
raise HuggingfaceError(message=completion_response["error"], status_code=response.status_code)
|
raise HuggingfaceError(
|
||||||
|
message=completion_response["error"],
|
||||||
|
status_code=response.status_code,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
|
model_response["choices"][0]["message"][
|
||||||
|
"content"
|
||||||
|
] = completion_response[0]["generated_text"]
|
||||||
|
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the llama2 tokenizer here
|
prompt_tokens = len(
|
||||||
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the llama2 tokenizer here
|
self.encoding.encode(prompt)
|
||||||
|
) ##[TODO] use the llama2 tokenizer here
|
||||||
|
completion_tokens = len(
|
||||||
|
self.encoding.encode(model_response["choices"][0]["message"]["content"])
|
||||||
|
) ##[TODO] use the llama2 tokenizer here
|
||||||
|
|
||||||
model_response["created"] = time.time()
|
model_response["created"] = time.time()
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
}
|
||||||
return model_response
|
return model_response
|
||||||
pass
|
pass
|
||||||
|
|
562
litellm/main.py
562
litellm/main.py
|
@ -4,17 +4,43 @@ from functools import partial
|
||||||
import dotenv, traceback, random, asyncio, time
|
import dotenv, traceback, random, asyncio, time
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import client, logging, exception_type, timeout, get_optional_params, get_litellm_params
|
from litellm import (
|
||||||
from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
|
client,
|
||||||
|
logging,
|
||||||
|
exception_type,
|
||||||
|
timeout,
|
||||||
|
get_optional_params,
|
||||||
|
get_litellm_params,
|
||||||
|
)
|
||||||
|
from litellm.utils import (
|
||||||
|
get_secret,
|
||||||
|
install_and_import,
|
||||||
|
CustomStreamWrapper,
|
||||||
|
read_config_args,
|
||||||
|
)
|
||||||
from .llms.anthropic import AnthropicLLM
|
from .llms.anthropic import AnthropicLLM
|
||||||
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
|
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, ModelResponse, read_config_args
|
from litellm.utils import (
|
||||||
from litellm.utils import get_ollama_response_stream, stream_to_string, together_ai_completion_streaming
|
get_secret,
|
||||||
|
install_and_import,
|
||||||
|
CustomStreamWrapper,
|
||||||
|
ModelResponse,
|
||||||
|
read_config_args,
|
||||||
|
)
|
||||||
|
from litellm.utils import (
|
||||||
|
get_ollama_response_stream,
|
||||||
|
stream_to_string,
|
||||||
|
together_ai_completion_streaming,
|
||||||
|
)
|
||||||
|
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
|
||||||
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
async def acompletion(*args, **kwargs):
|
async def acompletion(*args, **kwargs):
|
||||||
|
@ -26,20 +52,43 @@ async def acompletion(*args, **kwargs):
|
||||||
# Call the synchronous function using run_in_executor
|
# Call the synchronous function using run_in_executor
|
||||||
return await loop.run_in_executor(None, func)
|
return await loop.run_in_executor(None, func)
|
||||||
|
|
||||||
|
|
||||||
@client
|
@client
|
||||||
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
||||||
@timeout(600) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
@timeout(
|
||||||
|
600
|
||||||
|
) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
def completion(
|
def completion(
|
||||||
model, messages,# required params
|
model,
|
||||||
|
messages, # required params
|
||||||
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
||||||
functions=[], function_call="", # optional params
|
functions=[],
|
||||||
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
function_call="", # optional params
|
||||||
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
temperature=1,
|
||||||
|
top_p=1,
|
||||||
|
n=1,
|
||||||
|
stream=False,
|
||||||
|
stop=None,
|
||||||
|
max_tokens=float("inf"),
|
||||||
|
presence_penalty=0,
|
||||||
|
frequency_penalty=0,
|
||||||
|
logit_bias={},
|
||||||
|
user="",
|
||||||
|
deployment_id=None,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
*, return_async=False, api_key=None, force_timeout=600, logger_fn=None, verbose=False, azure=False, custom_llm_provider=None, custom_api_base=None,
|
*,
|
||||||
|
return_async=False,
|
||||||
|
api_key=None,
|
||||||
|
force_timeout=600,
|
||||||
|
logger_fn=None,
|
||||||
|
verbose=False,
|
||||||
|
azure=False,
|
||||||
|
custom_llm_provider=None,
|
||||||
|
custom_api_base=None,
|
||||||
# model specific optional params
|
# model specific optional params
|
||||||
# used by text-bison only
|
# used by text-bison only
|
||||||
top_k=40, request_timeout=0, # unused var for old version of OpenAI API
|
top_k=40,
|
||||||
|
request_timeout=0, # unused var for old version of OpenAI API
|
||||||
) -> ModelResponse:
|
) -> ModelResponse:
|
||||||
try:
|
try:
|
||||||
model_response = ModelResponse()
|
model_response = ModelResponse()
|
||||||
|
@ -48,27 +97,58 @@ def completion(
|
||||||
args = locals()
|
args = locals()
|
||||||
# check if user passed in any of the OpenAI optional params
|
# check if user passed in any of the OpenAI optional params
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
functions=functions, function_call=function_call,
|
functions=functions,
|
||||||
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
function_call=function_call,
|
||||||
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
n=n,
|
||||||
|
stream=stream,
|
||||||
|
stop=stop,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
presence_penalty=presence_penalty,
|
||||||
|
frequency_penalty=frequency_penalty,
|
||||||
|
logit_bias=logit_bias,
|
||||||
|
user=user,
|
||||||
|
deployment_id=deployment_id,
|
||||||
# params to identify the model
|
# params to identify the model
|
||||||
model=model, custom_llm_provider=custom_llm_provider, top_k=top_k,
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
top_k=top_k,
|
||||||
)
|
)
|
||||||
# For logging - save the values of the litellm-specific params passed in
|
# For logging - save the values of the litellm-specific params passed in
|
||||||
litellm_params = get_litellm_params(
|
litellm_params = get_litellm_params(
|
||||||
return_async=return_async, api_key=api_key, force_timeout=force_timeout,
|
return_async=return_async,
|
||||||
logger_fn=logger_fn, verbose=verbose, custom_llm_provider=custom_llm_provider,
|
api_key=api_key,
|
||||||
custom_api_base=custom_api_base)
|
force_timeout=force_timeout,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
verbose=verbose,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
custom_api_base=custom_api_base,
|
||||||
|
)
|
||||||
|
|
||||||
if custom_llm_provider == "azure":
|
if custom_llm_provider == "azure":
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
|
openai.api_base = (
|
||||||
openai.api_version = litellm.api_version if litellm.api_version is not None else get_secret("AZURE_API_VERSION")
|
litellm.api_base
|
||||||
|
if litellm.api_base is not None
|
||||||
|
else get_secret("AZURE_API_BASE")
|
||||||
|
)
|
||||||
|
openai.api_version = (
|
||||||
|
litellm.api_version
|
||||||
|
if litellm.api_version is not None
|
||||||
|
else get_secret("AZURE_API_VERSION")
|
||||||
|
)
|
||||||
# set key
|
# set key
|
||||||
openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY")
|
openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY")
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=messages,
|
||||||
|
additional_args=optional_params,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if litellm.headers:
|
if litellm.headers:
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
|
@ -79,47 +159,70 @@ def completion(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model, messages=messages, **optional_params
|
||||||
messages = messages,
|
|
||||||
**optional_params
|
|
||||||
)
|
)
|
||||||
elif model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai": # allow user to make an openai call with a custom base
|
elif (
|
||||||
|
model in litellm.open_ai_chat_completion_models
|
||||||
|
or custom_llm_provider == "custom_openai"
|
||||||
|
): # allow user to make an openai call with a custom base
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
# note: if a user sets a custom base - we should ensure this works
|
# note: if a user sets a custom base - we should ensure this works
|
||||||
api_base = custom_api_base if custom_api_base is not None else litellm.api_base # allow for the setting of dynamic and stateful api-bases
|
api_base = (
|
||||||
openai.api_base = api_base if api_base is not None else "https://api.openai.com/v1"
|
custom_api_base if custom_api_base is not None else litellm.api_base
|
||||||
|
) # allow for the setting of dynamic and stateful api-bases
|
||||||
|
openai.api_base = (
|
||||||
|
api_base if api_base is not None else "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
if litellm.organization:
|
if litellm.organization:
|
||||||
openai.organization = litellm.organization
|
openai.organization = litellm.organization
|
||||||
# set API KEY
|
# set API KEY
|
||||||
openai.api_key = api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
openai.api_key = (
|
||||||
|
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, additional_args=args, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=messages,
|
||||||
|
additional_args=args,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if litellm.headers:
|
if litellm.headers:
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
headers=litellm.headers,
|
headers=litellm.headers,
|
||||||
**optional_params
|
**optional_params,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model, messages=messages, **optional_params
|
||||||
messages = messages,
|
|
||||||
**optional_params
|
|
||||||
)
|
)
|
||||||
elif model in litellm.open_ai_text_completion_models:
|
elif model in litellm.open_ai_text_completion_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
|
openai.api_base = (
|
||||||
|
litellm.api_base
|
||||||
|
if litellm.api_base is not None
|
||||||
|
else "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
openai.api_key = (
|
||||||
|
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
if litellm.organization:
|
if litellm.organization:
|
||||||
openai.organization = litellm.organization
|
openai.organization = litellm.organization
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
additional_args=optional_params,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if litellm.headers:
|
if litellm.headers:
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
|
@ -128,13 +231,19 @@ def completion(
|
||||||
headers=litellm.headers,
|
headers=litellm.headers,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(model=model, prompt=prompt)
|
||||||
model=model,
|
|
||||||
prompt = prompt
|
|
||||||
)
|
|
||||||
completion_response = response["choices"][0]["text"]
|
completion_response = response["choices"][0]["text"]
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
model_response["created"] = response["created"]
|
model_response["created"] = response["created"]
|
||||||
|
@ -145,11 +254,17 @@ def completion(
|
||||||
# import replicate/if it fails then pip install replicate
|
# import replicate/if it fails then pip install replicate
|
||||||
install_and_import("replicate")
|
install_and_import("replicate")
|
||||||
import replicate
|
import replicate
|
||||||
|
|
||||||
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
||||||
replicate_key = os.environ.get("REPLICATE_API_TOKEN")
|
replicate_key = os.environ.get("REPLICATE_API_TOKEN")
|
||||||
if replicate_key == None:
|
if replicate_key == None:
|
||||||
# user did not set REPLICATE_API_TOKEN in .env
|
# user did not set REPLICATE_API_TOKEN in .env
|
||||||
replicate_key = get_secret("REPLICATE_API_KEY") or get_secret("REPLICATE_API_TOKEN") or api_key or litellm.replicate_key
|
replicate_key = (
|
||||||
|
get_secret("REPLICATE_API_KEY")
|
||||||
|
or get_secret("REPLICATE_API_TOKEN")
|
||||||
|
or api_key
|
||||||
|
or litellm.replicate_key
|
||||||
|
)
|
||||||
# set replicate kye
|
# set replicate kye
|
||||||
os.environ["REPLICATE_API_TOKEN"] = replicate_key
|
os.environ["REPLICATE_API_TOKEN"] = replicate_key
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
@ -158,12 +273,16 @@ def completion(
|
||||||
input["max_length"] = max_tokens # for t5 models
|
input["max_length"] = max_tokens # for t5 models
|
||||||
input["max_new_tokens"] = max_tokens # for llama2 models
|
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=input, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={"max_tokens": max_tokens},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
output = replicate.run(
|
output = replicate.run(model, input=input)
|
||||||
model,
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
input=input)
|
|
||||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
# let the stream handler know this is replicate
|
# let the stream handler know this is replicate
|
||||||
response = CustomStreamWrapper(output, "replicate")
|
response = CustomStreamWrapper(output, "replicate")
|
||||||
|
@ -173,7 +292,16 @@ def completion(
|
||||||
response += item
|
response += item
|
||||||
completion_response = response
|
completion_response = response
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
prompt_tokens = len(encoding.encode(prompt))
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
completion_tokens = len(encoding.encode(completion_response))
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
|
@ -183,14 +311,28 @@ def completion(
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
}
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.anthropic_models:
|
elif model in litellm.anthropic_models:
|
||||||
anthropic_key = api_key or litellm.anthropic_key or os.environ.get("ANTHROPIC_API_KEY")
|
anthropic_key = (
|
||||||
anthropic_client = AnthropicLLM(encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
|
api_key or litellm.anthropic_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||||
model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
|
)
|
||||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
anthropic_client = AnthropicLLM(
|
||||||
|
encoding=encoding,
|
||||||
|
default_max_tokens_to_sample=litellm.max_tokens,
|
||||||
|
api_key=anthropic_key,
|
||||||
|
)
|
||||||
|
model_response = anthropic_client.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(model_response, model)
|
response = CustomStreamWrapper(model_response, model)
|
||||||
return response
|
return response
|
||||||
|
@ -198,7 +340,11 @@ def completion(
|
||||||
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
# not sure if this will work after someone first uses another API
|
# not sure if this will work after someone first uses another API
|
||||||
openai.api_base = litellm.api_base if litellm.api_base is not None else "https://openrouter.ai/api/v1"
|
openai.api_base = (
|
||||||
|
litellm.api_base
|
||||||
|
if litellm.api_base is not None
|
||||||
|
else "https://openrouter.ai/api/v1"
|
||||||
|
)
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
if litellm.organization:
|
if litellm.organization:
|
||||||
openai.organization = litellm.organization
|
openai.organization = litellm.organization
|
||||||
|
@ -207,16 +353,24 @@ def completion(
|
||||||
elif litellm.openrouter_key:
|
elif litellm.openrouter_key:
|
||||||
openai.api_key = litellm.openrouter_key
|
openai.api_key = litellm.openrouter_key
|
||||||
else:
|
else:
|
||||||
openai.api_key = get_secret("OPENROUTER_API_KEY") or get_secret("OR_API_KEY")
|
openai.api_key = get_secret("OPENROUTER_API_KEY") or get_secret(
|
||||||
|
"OR_API_KEY"
|
||||||
|
)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=messages,
|
||||||
|
additional_args=optional_params,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if litellm.headers:
|
if litellm.headers:
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
headers=litellm.headers,
|
headers=litellm.headers,
|
||||||
**optional_params
|
**optional_params,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
openrouter_site_url = get_secret("OR_SITE_URL")
|
openrouter_site_url = get_secret("OR_SITE_URL")
|
||||||
|
@ -230,36 +384,51 @@ def completion(
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
headers =
|
headers={
|
||||||
{
|
|
||||||
"HTTP-Referer": openrouter_site_url, # To identify your site
|
"HTTP-Referer": openrouter_site_url, # To identify your site
|
||||||
"X-Title": openrouter_app_name # To identify your app
|
"X-Title": openrouter_app_name, # To identify your app
|
||||||
},
|
},
|
||||||
**optional_params
|
**optional_params,
|
||||||
)
|
)
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
# import cohere/if it fails then pip install cohere
|
# import cohere/if it fails then pip install cohere
|
||||||
install_and_import("cohere")
|
install_and_import("cohere")
|
||||||
import cohere
|
import cohere
|
||||||
cohere_key = api_key or litellm.cohere_key or get_secret("COHERE_API_KEY") or get_secret("CO_API_KEY")
|
|
||||||
|
cohere_key = (
|
||||||
|
api_key
|
||||||
|
or litellm.cohere_key
|
||||||
|
or get_secret("COHERE_API_KEY")
|
||||||
|
or get_secret("CO_API_KEY")
|
||||||
|
)
|
||||||
co = cohere.Client(cohere_key)
|
co = cohere.Client(cohere_key)
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
## COMPLETION CALL
|
|
||||||
response = co.generate(
|
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt,
|
input=prompt,
|
||||||
**optional_params
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
)
|
)
|
||||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
## COMPLETION CALL
|
||||||
|
response = co.generate(model=model, prompt=prompt, **optional_params)
|
||||||
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(response, model)
|
response = CustomStreamWrapper(response, model)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
completion_response = response[0].text
|
completion_response = response[0].text
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
prompt_tokens = len(encoding.encode(prompt))
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
completion_tokens = len(encoding.encode(completion_response))
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
|
@ -269,52 +438,100 @@ def completion(
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
}
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.huggingface_models or custom_llm_provider == "huggingface":
|
elif (
|
||||||
|
model in litellm.huggingface_models or custom_llm_provider == "huggingface"
|
||||||
|
):
|
||||||
custom_llm_provider = "huggingface"
|
custom_llm_provider = "huggingface"
|
||||||
huggingface_key = api_key or litellm.huggingface_key or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY")
|
huggingface_key = (
|
||||||
huggingface_client = HuggingfaceRestAPILLM(encoding=encoding, api_key=huggingface_key)
|
api_key
|
||||||
model_response = huggingface_client.completion(model=model, messages=messages, custom_api_base=custom_api_base, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
|
or litellm.huggingface_key
|
||||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
or os.environ.get("HF_TOKEN")
|
||||||
|
or os.environ.get("HUGGINGFACE_API_KEY")
|
||||||
|
)
|
||||||
|
huggingface_client = HuggingfaceRestAPILLM(
|
||||||
|
encoding=encoding, api_key=huggingface_key
|
||||||
|
)
|
||||||
|
model_response = huggingface_client.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
custom_api_base=custom_api_base,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="huggingface")
|
response = CustomStreamWrapper(
|
||||||
|
model_response, model, custom_llm_provider="huggingface"
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
response = model_response
|
response = model_response
|
||||||
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||||
import requests
|
import requests
|
||||||
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY") or api_key or litellm.togetherai_api_key
|
|
||||||
|
TOGETHER_AI_TOKEN = (
|
||||||
|
get_secret("TOGETHER_AI_TOKEN")
|
||||||
|
or get_secret("TOGETHERAI_API_KEY")
|
||||||
|
or api_key
|
||||||
|
or litellm.togetherai_api_key
|
||||||
|
)
|
||||||
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
||||||
endpoint = 'https://api.together.xyz/inference'
|
endpoint = "https://api.together.xyz/inference"
|
||||||
prompt = " ".join([message["content"] for message in messages]) # TODO: Add chat support for together AI
|
prompt = " ".join(
|
||||||
|
[message["content"] for message in messages]
|
||||||
|
) # TODO: Add chat support for together AI
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
if stream == True:
|
if stream == True:
|
||||||
return together_ai_completion_streaming({
|
return together_ai_completion_streaming(
|
||||||
|
{
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"request_type": "language-model-inference",
|
"request_type": "language-model-inference",
|
||||||
**optional_params
|
**optional_params,
|
||||||
},
|
},
|
||||||
headers=headers)
|
headers=headers,
|
||||||
res = requests.post(endpoint, json={
|
)
|
||||||
|
res = requests.post(
|
||||||
|
endpoint,
|
||||||
|
json={
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"request_type": "language-model-inference",
|
"request_type": "language-model-inference",
|
||||||
**optional_params
|
**optional_params,
|
||||||
},
|
},
|
||||||
headers=headers
|
headers=headers,
|
||||||
)
|
)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": res.text}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": res.text,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
# make this safe for reading, if output does not exist raise an error
|
# make this safe for reading, if output does not exist raise an error
|
||||||
json_response = res.json()
|
json_response = res.json()
|
||||||
if "output" not in json_response:
|
if "output" not in json_response:
|
||||||
raise Exception(f"liteLLM: Error Making TogetherAI request, JSON Response {json_response}")
|
raise Exception(
|
||||||
completion_response = json_response['output']['choices'][0]['text']
|
f"liteLLM: Error Making TogetherAI request, JSON Response {json_response}"
|
||||||
|
)
|
||||||
|
completion_response = json_response["output"]["choices"][0]["text"]
|
||||||
prompt_tokens = len(encoding.encode(prompt))
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
completion_tokens = len(encoding.encode(completion_response))
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
|
@ -324,7 +541,7 @@ def completion(
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
}
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.vertex_chat_models:
|
elif model in litellm.vertex_chat_models:
|
||||||
|
@ -332,21 +549,41 @@ def completion(
|
||||||
install_and_import("vertexai")
|
install_and_import("vertexai")
|
||||||
import vertexai
|
import vertexai
|
||||||
from vertexai.preview.language_models import ChatModel, InputOutputTextPair
|
from vertexai.preview.language_models import ChatModel, InputOutputTextPair
|
||||||
vertexai.init(project=litellm.vertex_project, location=litellm.vertex_location)
|
|
||||||
|
vertexai.init(
|
||||||
|
project=litellm.vertex_project, location=litellm.vertex_location
|
||||||
|
)
|
||||||
# vertexai does not use an API key, it looks for credentials.json in the environment
|
# vertexai does not use an API key, it looks for credentials.json in the environment
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"litellm_params": litellm_params,
|
||||||
|
"optional_params": optional_params,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
chat_model = ChatModel.from_pretrained(model)
|
chat_model = ChatModel.from_pretrained(model)
|
||||||
|
|
||||||
|
|
||||||
chat = chat_model.start_chat()
|
chat = chat_model.start_chat()
|
||||||
completion_response = chat.send_message(prompt, **optional_params)
|
completion_response = chat.send_message(prompt, **optional_params)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
@ -358,17 +595,33 @@ def completion(
|
||||||
import vertexai
|
import vertexai
|
||||||
from vertexai.language_models import TextGenerationModel
|
from vertexai.language_models import TextGenerationModel
|
||||||
|
|
||||||
vertexai.init(project=litellm.vertex_project, location=litellm.vertex_location)
|
vertexai.init(
|
||||||
|
project=litellm.vertex_project, location=litellm.vertex_location
|
||||||
|
)
|
||||||
# vertexai does not use an API key, it looks for credentials.json in the environment
|
# vertexai does not use an API key, it looks for credentials.json in the environment
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
vertex_model = TextGenerationModel.from_pretrained(model)
|
vertex_model = TextGenerationModel.from_pretrained(model)
|
||||||
completion_response = vertex_model.predict(prompt, **optional_params)
|
completion_response = vertex_model.predict(prompt, **optional_params)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
@ -378,20 +631,35 @@ def completion(
|
||||||
elif model in litellm.ai21_models:
|
elif model in litellm.ai21_models:
|
||||||
install_and_import("ai21")
|
install_and_import("ai21")
|
||||||
import ai21
|
import ai21
|
||||||
|
|
||||||
ai21.api_key = get_secret("AI21_API_KEY")
|
ai21.api_key = get_secret("AI21_API_KEY")
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
ai21_response = ai21.Completion.execute(
|
ai21_response = ai21.Completion.execute(
|
||||||
model=model,
|
model=model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
)
|
)
|
||||||
completion_response = ai21_response['completions'][0]['data']['text']
|
completion_response = ai21_response["completions"][0]["data"]["text"]
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
@ -399,7 +667,9 @@ def completion(
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
response = model_response
|
response = model_response
|
||||||
elif custom_llm_provider == "ollama":
|
elif custom_llm_provider == "ollama":
|
||||||
endpoint = litellm.api_base if litellm.api_base is not None else custom_api_base
|
endpoint = (
|
||||||
|
litellm.api_base if litellm.api_base is not None else custom_api_base
|
||||||
|
)
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
@ -407,14 +677,23 @@ def completion(
|
||||||
generator = get_ollama_response_stream(endpoint, model, prompt)
|
generator = get_ollama_response_stream(endpoint, model, prompt)
|
||||||
# assume all responses are streamed
|
# assume all responses are streamed
|
||||||
return generator
|
return generator
|
||||||
elif custom_llm_provider == "baseten" or litellm.api_base=="https://app.baseten.co":
|
elif (
|
||||||
|
custom_llm_provider == "baseten"
|
||||||
|
or litellm.api_base == "https://app.baseten.co"
|
||||||
|
):
|
||||||
import baseten
|
import baseten
|
||||||
base_ten_key = get_secret('BASETEN_API_KEY')
|
|
||||||
|
base_ten_key = get_secret("BASETEN_API_KEY")
|
||||||
baseten.login(base_ten_key)
|
baseten.login(base_ten_key)
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
base_ten__model = baseten.deployed_model_version_id(model)
|
base_ten__model = baseten.deployed_model_version_id(model)
|
||||||
|
|
||||||
|
@ -424,7 +703,16 @@ def completion(
|
||||||
if type(completion_response) == dict:
|
if type(completion_response) == dict:
|
||||||
completion_response = completion_response["generated_text"]
|
completion_response = completion_response["generated_text"]
|
||||||
|
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": completion_response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
@ -432,16 +720,35 @@ def completion(
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
response = model_response
|
response = model_response
|
||||||
|
|
||||||
elif custom_llm_provider == "petals" or (litellm.api_base and "chat.petals.dev" in litellm.api_base):
|
elif custom_llm_provider == "petals" or (
|
||||||
|
litellm.api_base and "chat.petals.dev" in litellm.api_base
|
||||||
|
):
|
||||||
url = "https://chat.petals.dev/api/v1/generate"
|
url = "https://chat.petals.dev/api/v1/generate"
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
response = requests.post(url, data={"inputs": prompt, "max_new_tokens": 100, "model": model})
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
|
response = requests.post(
|
||||||
|
url, data={"inputs": prompt, "max_new_tokens": 100, "model": model}
|
||||||
|
)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": response}, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=prompt,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"original_response": response,
|
||||||
|
},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
completion_response = response.json()["outputs"]
|
completion_response = response.json()["outputs"]
|
||||||
|
|
||||||
# RESPONSE OBJECT
|
# RESPONSE OBJECT
|
||||||
|
@ -451,15 +758,32 @@ def completion(
|
||||||
response = model_response
|
response = model_response
|
||||||
else:
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=messages,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
args = locals()
|
args = locals()
|
||||||
raise ValueError(f"Unable to map your input to a model. Check your input - {args}")
|
raise ValueError(
|
||||||
|
f"Unable to map your input to a model. Check your input - {args}"
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
logging(
|
||||||
|
model=model,
|
||||||
|
input=messages,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
additional_args={"max_tokens": max_tokens},
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
exception=e,
|
||||||
|
)
|
||||||
## Map to OpenAI Exception
|
## Map to OpenAI Exception
|
||||||
raise exception_type(model=model, custom_llm_provider=custom_llm_provider, original_exception=e)
|
raise exception_type(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider, original_exception=e
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def batch_completion(*args, **kwargs):
|
def batch_completion(*args, **kwargs):
|
||||||
batch_messages = args[1] if len(args) > 1 else kwargs.get("messages")
|
batch_messages = args[1] if len(args) > 1 else kwargs.get("messages")
|
||||||
|
@ -480,9 +804,12 @@ def batch_completion(*args, **kwargs):
|
||||||
results = [future.result() for future in completions]
|
results = [future.result() for future in completions]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
@client
|
@client
|
||||||
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
@timeout(
|
||||||
|
60
|
||||||
|
) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
||||||
try:
|
try:
|
||||||
response = None
|
response = None
|
||||||
|
@ -519,6 +846,8 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
||||||
## Map to OpenAI Exception
|
## Map to OpenAI Exception
|
||||||
raise exception_type(model=model, original_exception=e)
|
raise exception_type(model=model, original_exception=e)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
## Set verbose to true -> ```litellm.set_verbose = True```
|
## Set verbose to true -> ```litellm.set_verbose = True```
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
|
@ -527,10 +856,13 @@ def print_verbose(print_statement):
|
||||||
if random.random() <= 0.3:
|
if random.random() <= 0.3:
|
||||||
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
||||||
|
|
||||||
|
|
||||||
def config_completion(**kwargs):
|
def config_completion(**kwargs):
|
||||||
if litellm.config_path != None:
|
if litellm.config_path != None:
|
||||||
config_args = read_config_args(litellm.config_path)
|
config_args = read_config_args(litellm.config_path)
|
||||||
# overwrite any args passed in with config args
|
# overwrite any args passed in with config args
|
||||||
return completion(**kwargs, **config_args)
|
return completion(**kwargs, **config_args)
|
||||||
else:
|
else:
|
||||||
raise ValueError("No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`")
|
raise ValueError(
|
||||||
|
"No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
|
||||||
|
)
|
||||||
|
|
|
@ -3,9 +3,12 @@ import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
def testing_batch_completion(*args, **kwargs):
|
def testing_batch_completion(*args, **kwargs):
|
||||||
try:
|
try:
|
||||||
batch_models = args[0] if len(args) > 0 else kwargs.pop("models") ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
|
batch_models = (
|
||||||
|
args[0] if len(args) > 0 else kwargs.pop("models")
|
||||||
|
) ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
|
||||||
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
|
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
|
||||||
results = []
|
results = []
|
||||||
completions = []
|
completions = []
|
||||||
|
@ -18,16 +21,32 @@ def testing_batch_completion(*args, **kwargs):
|
||||||
if len(args) > 0:
|
if len(args) > 0:
|
||||||
args_modified[0] = model["model"]
|
args_modified[0] = model["model"]
|
||||||
else:
|
else:
|
||||||
kwargs_modified["model"] = model["model"] if isinstance(model, dict) and "model" in model else model # if model is a dictionary get it's value else assume it's a string
|
kwargs_modified["model"] = (
|
||||||
kwargs_modified["custom_llm_provider"] = model["custom_llm_provider"] if isinstance(model, dict) and "custom_llm_provider" in model else None
|
model["model"]
|
||||||
kwargs_modified["custom_api_base"] = model["custom_api_base"] if isinstance(model, dict) and "custom_api_base" in model else None
|
if isinstance(model, dict) and "model" in model
|
||||||
|
else model
|
||||||
|
) # if model is a dictionary get it's value else assume it's a string
|
||||||
|
kwargs_modified["custom_llm_provider"] = (
|
||||||
|
model["custom_llm_provider"]
|
||||||
|
if isinstance(model, dict) and "custom_llm_provider" in model
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
kwargs_modified["custom_api_base"] = (
|
||||||
|
model["custom_api_base"]
|
||||||
|
if isinstance(model, dict) and "custom_api_base" in model
|
||||||
|
else None
|
||||||
|
)
|
||||||
for message_list in batch_messages:
|
for message_list in batch_messages:
|
||||||
if len(args) > 1:
|
if len(args) > 1:
|
||||||
args_modified[1] = message_list
|
args_modified[1] = message_list
|
||||||
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
|
future = executor.submit(
|
||||||
|
litellm.completion, *args_modified, **kwargs_modified
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kwargs_modified["messages"] = message_list
|
kwargs_modified["messages"] = message_list
|
||||||
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
|
future = executor.submit(
|
||||||
|
litellm.completion, *args_modified, **kwargs_modified
|
||||||
|
)
|
||||||
completions.append((future, message_list))
|
completions.append((future, message_list))
|
||||||
|
|
||||||
# Retrieve the results and calculate elapsed time for each completion call
|
# Retrieve the results and calculate elapsed time for each completion call
|
||||||
|
@ -38,17 +57,27 @@ def testing_batch_completion(*args, **kwargs):
|
||||||
result = future.result()
|
result = future.result()
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
elapsed_time = end_time - start_time
|
elapsed_time = end_time - start_time
|
||||||
result_dict = {"status": "succeeded", "response": future.result(), "prompt": message_list, "response_time": elapsed_time}
|
result_dict = {
|
||||||
|
"status": "succeeded",
|
||||||
|
"response": future.result(),
|
||||||
|
"prompt": message_list,
|
||||||
|
"response_time": elapsed_time,
|
||||||
|
}
|
||||||
results.append(result_dict)
|
results.append(result_dict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
elapsed_time = end_time - start_time
|
elapsed_time = end_time - start_time
|
||||||
result_dict = {"status": "failed", "response": e, "response_time": elapsed_time}
|
result_dict = {
|
||||||
|
"status": "failed",
|
||||||
|
"response": e,
|
||||||
|
"response_time": elapsed_time,
|
||||||
|
}
|
||||||
results.append(result_dict)
|
results.append(result_dict)
|
||||||
return results
|
return results
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
def duration_test_model(original_function):
|
def duration_test_model(original_function):
|
||||||
def wrapper_function(*args, **kwargs):
|
def wrapper_function(*args, **kwargs):
|
||||||
# Code to be executed before the original function
|
# Code to be executed before the original function
|
||||||
|
@ -70,22 +99,39 @@ def duration_test_model(original_function):
|
||||||
# Return the wrapper function
|
# Return the wrapper function
|
||||||
return wrapper_function
|
return wrapper_function
|
||||||
|
|
||||||
|
|
||||||
@duration_test_model
|
@duration_test_model
|
||||||
def load_test_model(models: list, prompt: str = None, num_calls: int = None):
|
def load_test_model(models: list, prompt: str = None, num_calls: int = None):
|
||||||
test_calls = 100
|
test_calls = 100
|
||||||
if num_calls:
|
if num_calls:
|
||||||
test_calls = num_calls
|
test_calls = num_calls
|
||||||
input_prompt = prompt if prompt else "Hey, how's it going?"
|
input_prompt = prompt if prompt else "Hey, how's it going?"
|
||||||
messages = [{"role": "user", "content": prompt}] if prompt else [{"role": "user", "content": input_prompt}]
|
messages = (
|
||||||
full_message_list = [messages for _ in range(test_calls)] # call it as many times as set by user to load test models
|
[{"role": "user", "content": prompt}]
|
||||||
|
if prompt
|
||||||
|
else [{"role": "user", "content": input_prompt}]
|
||||||
|
)
|
||||||
|
full_message_list = [
|
||||||
|
messages for _ in range(test_calls)
|
||||||
|
] # call it as many times as set by user to load test models
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
results = testing_batch_completion(models=models, messages=full_message_list)
|
results = testing_batch_completion(models=models, messages=full_message_list)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "results": results}
|
return {
|
||||||
|
"total_response_time": response_time,
|
||||||
|
"calls_made": test_calls,
|
||||||
|
"prompt": input_prompt,
|
||||||
|
"results": results,
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": input_prompt, "exception": e}
|
return {
|
||||||
|
"total_response_time": response_time,
|
||||||
|
"calls_made": test_calls,
|
||||||
|
"prompt": input_prompt,
|
||||||
|
"exception": e,
|
||||||
|
}
|
||||||
|
|
|
@ -3,15 +3,20 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
def logger_fn(model_call_object: dict):
|
||||||
print(f"model call details: {model_call_object}")
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
@ -20,7 +25,12 @@ temp_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn, api_key=temp_key)
|
response = completion(
|
||||||
|
model="claude-instant-1",
|
||||||
|
messages=messages,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
api_key=temp_key,
|
||||||
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
@ -33,7 +43,9 @@ litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
os.environ.pop("ANTHROPIC_API_KEY")
|
os.environ.pop("ANTHROPIC_API_KEY")
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
|
|
@ -5,9 +5,13 @@ import sys, os
|
||||||
import pytest
|
import pytest
|
||||||
import traceback
|
import traceback
|
||||||
import asyncio
|
import asyncio
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
from litellm import acompletion
|
from litellm import acompletion
|
||||||
|
|
||||||
|
|
||||||
async def test_get_response():
|
async def test_get_response():
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
@ -17,5 +21,6 @@ async def test_get_response():
|
||||||
pytest.fail(f"error occurred: {e}")
|
pytest.fail(f"error occurred: {e}")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
response = asyncio.run(test_get_response())
|
response = asyncio.run(test_get_response())
|
||||||
print(response)
|
print(response)
|
|
@ -5,12 +5,13 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
# Get the current directory of the script
|
# Get the current directory of the script
|
||||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
# Get the parent directory by joining the current directory with '..'
|
# Get the parent directory by joining the current directory with '..'
|
||||||
parent_dir = os.path.join(current_dir, '../..')
|
parent_dir = os.path.join(current_dir, "../..")
|
||||||
|
|
||||||
# Add the parent directory to the system path
|
# Add the parent directory to the system path
|
||||||
sys.path.append(parent_dir)
|
sys.path.append(parent_dir)
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import batch_completion
|
from litellm import batch_completion
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import os
|
import os
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
@ -12,7 +16,6 @@ litellm.caching = True
|
||||||
messages = [{"role": "user", "content": "who is ishaan Github? "}]
|
messages = [{"role": "user", "content": "who is ishaan Github? "}]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# test if response cached
|
# test if response cached
|
||||||
def test_caching():
|
def test_caching():
|
||||||
try:
|
try:
|
||||||
|
@ -29,7 +32,3 @@ def test_caching():
|
||||||
litellm.caching = False
|
litellm.caching = False
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,9 @@ import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
@ -14,17 +16,22 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
def logger_fn(model_call_object: dict):
|
||||||
# print(f"model call details: {model_call_object}")
|
# print(f"model call details: {model_call_object}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai():
|
def test_completion_openai():
|
||||||
try:
|
try:
|
||||||
print("running query")
|
print("running query")
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -34,33 +41,46 @@ def test_completion_openai():
|
||||||
|
|
||||||
def test_completion_claude():
|
def test_completion_claude():
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_non_openai():
|
def test_completion_non_openai():
|
||||||
try:
|
try:
|
||||||
response = completion(model="command-nightly", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="command-nightly", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_embedding_openai():
|
def test_embedding_openai():
|
||||||
try:
|
try:
|
||||||
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
|
response = embedding(
|
||||||
|
model="text-embedding-ada-002", input=[user_message], logger_fn=logger_fn
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(f"response: {str(response)[:50]}")
|
print(f"response: {str(response)[:50]}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_bad_azure_embedding():
|
def test_bad_azure_embedding():
|
||||||
try:
|
try:
|
||||||
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
|
response = embedding(
|
||||||
|
model="chatgpt-test", input=[user_message], logger_fn=logger_fn
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(f"response: {str(response)[:50]}")
|
print(f"response: {str(response)[:50]}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# def test_good_azure_embedding():
|
# def test_good_azure_embedding():
|
||||||
# try:
|
# try:
|
||||||
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
||||||
|
@ -68,4 +88,3 @@ def test_bad_azure_embedding():
|
||||||
# print(f"response: {str(response)[:50]}")
|
# print(f"response: {str(response)[:50]}")
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import os
|
import os
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
# from infisical import InfisicalClient
|
# from infisical import InfisicalClient
|
||||||
|
|
||||||
# litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
@ -15,30 +20,39 @@ from litellm import embedding, completion
|
||||||
user_message = "Hello, whats the weather in San Francisco??"
|
user_message = "Hello, whats the weather in San Francisco??"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
def logger_fn(user_model_dict):
|
def logger_fn(user_model_dict):
|
||||||
print(f"user_model_dict: {user_model_dict}")
|
print(f"user_model_dict: {user_model_dict}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_claude():
|
def test_completion_claude():
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_claude_stream():
|
def test_completion_claude_stream():
|
||||||
try:
|
try:
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
response = completion(model="claude-2", messages=messages, stream=True)
|
response = completion(model="claude-2", messages=messages, stream=True)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta']) # same as openai format
|
print(chunk["choices"][0]["delta"]) # same as openai format
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# def test_completion_hf_api():
|
# def test_completion_hf_api():
|
||||||
# try:
|
# try:
|
||||||
# user_message = "write some code to find the sum of two numbers"
|
# user_message = "write some code to find the sum of two numbers"
|
||||||
|
@ -62,10 +76,12 @@ def test_completion_claude_stream():
|
||||||
|
|
||||||
def test_completion_cohere():
|
def test_completion_cohere():
|
||||||
try:
|
try:
|
||||||
response = completion(model="command-nightly", messages=messages, max_tokens=100)
|
response = completion(
|
||||||
|
model="command-nightly", messages=messages, max_tokens=100
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
response_str = response['choices'][0]['message']['content']
|
response_str = response["choices"][0]["message"]["content"]
|
||||||
print(f"str response{response_str}")
|
print(f"str response{response_str}")
|
||||||
response_str_2 = response.choices[0].message.content
|
response_str_2 = response.choices[0].message.content
|
||||||
if type(response_str) != str:
|
if type(response_str) != str:
|
||||||
|
@ -75,24 +91,31 @@ def test_completion_cohere():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_cohere_stream():
|
def test_completion_cohere_stream():
|
||||||
try:
|
try:
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50)
|
response = completion(
|
||||||
|
model="command-nightly", messages=messages, stream=True, max_tokens=50
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta']) # same as openai format
|
print(chunk["choices"][0]["delta"]) # same as openai format
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai():
|
def test_completion_openai():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
|
||||||
response_str = response['choices'][0]['message']['content']
|
response_str = response["choices"][0]["message"]["content"]
|
||||||
response_str_2 = response.choices[0].message.content
|
response_str_2 = response.choices[0].message.content
|
||||||
assert response_str == response_str_2
|
assert response_str == response_str_2
|
||||||
assert type(response_str) == str
|
assert type(response_str) == str
|
||||||
|
@ -100,6 +123,7 @@ def test_completion_openai():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_text_openai():
|
def test_completion_text_openai():
|
||||||
try:
|
try:
|
||||||
response = completion(model="text-davinci-003", messages=messages)
|
response = completion(model="text-davinci-003", messages=messages)
|
||||||
|
@ -108,17 +132,31 @@ def test_completion_text_openai():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai_with_optional_params():
|
def test_completion_openai_with_optional_params():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.1,
|
||||||
|
user="ishaan_dev@berri.ai",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openrouter():
|
def test_completion_openrouter():
|
||||||
try:
|
try:
|
||||||
response = completion(model="google/palm-2-chat-bison", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
|
response = completion(
|
||||||
|
model="google/palm-2-chat-bison",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.1,
|
||||||
|
user="ishaan_dev@berri.ai",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -127,12 +165,23 @@ def test_completion_openrouter():
|
||||||
|
|
||||||
def test_completion_openai_with_more_optional_params():
|
def test_completion_openai_with_more_optional_params():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai")
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.1,
|
||||||
|
n=2,
|
||||||
|
max_tokens=150,
|
||||||
|
presence_penalty=0.5,
|
||||||
|
frequency_penalty=-0.5,
|
||||||
|
logit_bias={123: 5},
|
||||||
|
user="ishaan_dev@berri.ai",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
response_str = response['choices'][0]['message']['content']
|
response_str = response["choices"][0]["message"]["content"]
|
||||||
response_str_2 = response.choices[0].message.content
|
response_str_2 = response.choices[0].message.content
|
||||||
print(response['choices'][0]['message']['content'])
|
print(response["choices"][0]["message"]["content"])
|
||||||
print(response.choices[0].message.content)
|
print(response.choices[0].message.content)
|
||||||
if type(response_str) != str:
|
if type(response_str) != str:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
@ -141,14 +190,28 @@ def test_completion_openai_with_more_optional_params():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai_with_stream():
|
def test_completion_openai_with_stream():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai")
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.1,
|
||||||
|
n=2,
|
||||||
|
max_tokens=150,
|
||||||
|
presence_penalty=0.5,
|
||||||
|
stream=True,
|
||||||
|
frequency_penalty=-0.5,
|
||||||
|
logit_bias={27000: 5},
|
||||||
|
user="ishaan_dev@berri.ai",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai_with_functions():
|
def test_completion_openai_with_functions():
|
||||||
function1 = [
|
function1 = [
|
||||||
{
|
{
|
||||||
|
@ -159,32 +222,38 @@ def test_completion_openai_with_functions():
|
||||||
"properties": {
|
"properties": {
|
||||||
"location": {
|
"location": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The city and state, e.g. San Francisco, CA"
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
},
|
},
|
||||||
"unit": {
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
"type": "string",
|
},
|
||||||
"enum": ["celsius", "fahrenheit"]
|
"required": ["location"],
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"required": ["location"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, functions=function1)
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo", messages=messages, functions=function1
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_azure():
|
def test_completion_azure():
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure")
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
deployment_id="chatgpt-test",
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider="azure",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
|
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
|
||||||
def test_completion_replicate_llama_stream():
|
def test_completion_replicate_llama_stream():
|
||||||
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||||
|
@ -197,23 +266,32 @@ def test_completion_replicate_llama_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_replicate_stability_stream():
|
def test_completion_replicate_stability_stream():
|
||||||
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||||
try:
|
try:
|
||||||
response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate")
|
response = completion(
|
||||||
|
model=model_name,
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
custom_llm_provider="replicate",
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta'])
|
print(chunk["choices"][0]["delta"])
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_replicate_stability():
|
def test_completion_replicate_stability():
|
||||||
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||||
try:
|
try:
|
||||||
response = completion(model=model_name, messages=messages, custom_llm_provider="replicate")
|
response = completion(
|
||||||
|
model=model_name, messages=messages, custom_llm_provider="replicate"
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
response_str = response['choices'][0]['message']['content']
|
response_str = response["choices"][0]["message"]["content"]
|
||||||
response_str_2 = response.choices[0].message.content
|
response_str_2 = response.choices[0].message.content
|
||||||
print(response_str)
|
print(response_str)
|
||||||
print(response_str_2)
|
print(response_str_2)
|
||||||
|
@ -224,6 +302,7 @@ def test_completion_replicate_stability():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
######## Test TogetherAI ########
|
######## Test TogetherAI ########
|
||||||
def test_completion_together_ai():
|
def test_completion_together_ai():
|
||||||
model_name = "togethercomputer/llama-2-70b-chat"
|
model_name = "togethercomputer/llama-2-70b-chat"
|
||||||
|
@ -234,15 +313,22 @@ def test_completion_together_ai():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_petals():
|
def test_petals():
|
||||||
model_name = "stabilityai/StableBeluga2"
|
model_name = "stabilityai/StableBeluga2"
|
||||||
try:
|
try:
|
||||||
response = completion(model=model_name, messages=messages, custom_llm_provider="petals", force_timeout=120)
|
response = completion(
|
||||||
|
model=model_name,
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider="petals",
|
||||||
|
force_timeout=120,
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# def test_baseten_falcon_7bcompletion():
|
# def test_baseten_falcon_7bcompletion():
|
||||||
# model_name = "qvv0xeq"
|
# model_name = "qvv0xeq"
|
||||||
# try:
|
# try:
|
||||||
|
@ -290,7 +376,6 @@ def test_petals():
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### Test A121 ###################
|
#### Test A121 ###################
|
||||||
# def test_completion_ai21():
|
# def test_completion_ai21():
|
||||||
# model_name = "j2-light"
|
# model_name = "j2-light"
|
||||||
|
@ -333,4 +418,3 @@ def test_petals():
|
||||||
# return
|
# return
|
||||||
|
|
||||||
# test_completion_together_ai_stream()
|
# test_completion_together_ai_stream()
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,21 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import os
|
import os
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
|
||||||
def logging_fn(model_call_dict):
|
def logging_fn(model_call_dict):
|
||||||
print(f"model call details: {model_call_dict}")
|
print(f"model call details: {model_call_dict}")
|
||||||
|
|
||||||
|
|
||||||
models = ["gorilla-7b-hf-v1", "gpt-4"]
|
models = ["gorilla-7b-hf-v1", "gpt-4"]
|
||||||
custom_llm_provider = None
|
custom_llm_provider = None
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -17,4 +24,10 @@ for model in models: # iterate through list
|
||||||
if model == "gorilla-7b-hf-v1":
|
if model == "gorilla-7b-hf-v1":
|
||||||
custom_llm_provider = "custom_openai"
|
custom_llm_provider = "custom_openai"
|
||||||
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
|
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
|
||||||
completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, logger_fn=logging_fn)
|
completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
custom_api_base=custom_api_base,
|
||||||
|
logger_fn=logging_fn,
|
||||||
|
)
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
from infisical import InfisicalClient
|
from infisical import InfisicalClient
|
||||||
|
@ -11,9 +12,12 @@ from infisical import InfisicalClient
|
||||||
# # litellm.set_verbose = True
|
# # litellm.set_verbose = True
|
||||||
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
|
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
|
||||||
|
|
||||||
|
|
||||||
def test_openai_embedding():
|
def test_openai_embedding():
|
||||||
try:
|
try:
|
||||||
response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
|
response = embedding(
|
||||||
|
model="text-embedding-ada-002", input=["good morning from litellm"]
|
||||||
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(f"response: {str(response)}")
|
print(f"response: {str(response)}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -2,9 +2,20 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion, AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from litellm import (
|
||||||
|
embedding,
|
||||||
|
completion,
|
||||||
|
AuthenticationError,
|
||||||
|
InvalidRequestError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
OpenAIError,
|
||||||
|
)
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -23,6 +34,8 @@ litellm.failure_callback = ["sentry"]
|
||||||
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
||||||
test_model = "claude-instant-1"
|
test_model = "claude-instant-1"
|
||||||
models = ["claude-instant-1"]
|
models = ["claude-instant-1"]
|
||||||
|
|
||||||
|
|
||||||
def logging_fn(model_call_dict):
|
def logging_fn(model_call_dict):
|
||||||
if "model" in model_call_dict:
|
if "model" in model_call_dict:
|
||||||
print(f"model_call_dict: {model_call_dict['model']}")
|
print(f"model_call_dict: {model_call_dict['model']}")
|
||||||
|
@ -38,7 +51,12 @@ def test_context_window(model):
|
||||||
try:
|
try:
|
||||||
model = "chatgpt-test"
|
model = "chatgpt-test"
|
||||||
print(f"model: {model}")
|
print(f"model: {model}")
|
||||||
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
|
response = completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider="azure",
|
||||||
|
logger_fn=logging_fn,
|
||||||
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
except InvalidRequestError as e:
|
except InvalidRequestError as e:
|
||||||
print(f"InvalidRequestError: {e.llm_provider}")
|
print(f"InvalidRequestError: {e.llm_provider}")
|
||||||
|
@ -52,8 +70,11 @@ def test_context_window(model):
|
||||||
print(f"Uncaught Exception - {e}")
|
print(f"Uncaught Exception - {e}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
test_context_window(test_model)
|
test_context_window(test_model)
|
||||||
|
|
||||||
|
|
||||||
# Test 2: InvalidAuth Errors
|
# Test 2: InvalidAuth Errors
|
||||||
@pytest.mark.parametrize("model", models)
|
@pytest.mark.parametrize("model", models)
|
||||||
def invalid_auth(model): # set the model key to an invalid key, depending on the model
|
def invalid_auth(model): # set the model key to an invalid key, depending on the model
|
||||||
|
@ -74,15 +95,22 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
|
||||||
elif model == "command-nightly":
|
elif model == "command-nightly":
|
||||||
temporary_key = os.environ["COHERE_API_KEY"]
|
temporary_key = os.environ["COHERE_API_KEY"]
|
||||||
os.environ["COHERE_API_KEY"] = "bad-key"
|
os.environ["COHERE_API_KEY"] = "bad-key"
|
||||||
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
elif (
|
||||||
|
model
|
||||||
|
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||||
|
):
|
||||||
temporary_key = os.environ["REPLICATE_API_KEY"]
|
temporary_key = os.environ["REPLICATE_API_KEY"]
|
||||||
os.environ["REPLICATE_API_KEY"] = "bad-key"
|
os.environ["REPLICATE_API_KEY"] = "bad-key"
|
||||||
print(f"model: {model}")
|
print(f"model: {model}")
|
||||||
response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
|
response = completion(
|
||||||
|
model=model, messages=messages, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
except AuthenticationError as e:
|
except AuthenticationError as e:
|
||||||
print(f"AuthenticationError Caught Exception - {e.llm_provider}")
|
print(f"AuthenticationError Caught Exception - {e.llm_provider}")
|
||||||
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
except (
|
||||||
|
OpenAIError
|
||||||
|
): # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
||||||
print(f"OpenAIError Caught Exception - {e}")
|
print(f"OpenAIError Caught Exception - {e}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(type(e))
|
print(type(e))
|
||||||
|
@ -99,9 +127,14 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
|
||||||
os.environ["ANTHROPIC_API_KEY"] = temporary_key
|
os.environ["ANTHROPIC_API_KEY"] = temporary_key
|
||||||
elif model == "command-nightly":
|
elif model == "command-nightly":
|
||||||
os.environ["COHERE_API_KEY"] = temporary_key
|
os.environ["COHERE_API_KEY"] = temporary_key
|
||||||
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
elif (
|
||||||
|
model
|
||||||
|
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||||
|
):
|
||||||
os.environ["REPLICATE_API_KEY"] = temporary_key
|
os.environ["REPLICATE_API_KEY"] = temporary_key
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
invalid_auth(test_model)
|
invalid_auth(test_model)
|
||||||
# # Test 3: Rate Limit Errors
|
# # Test 3: Rate Limit Errors
|
||||||
# def test_model(model):
|
# def test_model(model):
|
||||||
|
@ -142,5 +175,3 @@ invalid_auth(test_model)
|
||||||
|
|
||||||
# accuracy_score = counts[True]/(counts[True] + counts[False])
|
# accuracy_score = counts[True]/(counts[True] + counts[False])
|
||||||
# print(f"accuracy_score: {accuracy_score}")
|
# print(f"accuracy_score: {accuracy_score}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,9 @@ import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
@ -18,7 +20,11 @@ messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
# openai call
|
# openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
|
||||||
|
)
|
||||||
|
|
||||||
# cohere call
|
# cohere call
|
||||||
response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}])
|
response = completion(
|
||||||
|
model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]
|
||||||
|
)
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import load_test_model, testing_batch_completion
|
from litellm import load_test_model, testing_batch_completion
|
||||||
|
|
||||||
|
@ -16,7 +19,19 @@ from litellm import load_test_model, testing_batch_completion
|
||||||
# print(result)
|
# print(result)
|
||||||
|
|
||||||
## Quality Test across Model
|
## Quality Test across Model
|
||||||
models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}]
|
models = [
|
||||||
messages = [[{"role": "user", "content": "What is your name?"}], [{"role": "user", "content": "Hey, how's it going?"}]]
|
"gpt-3.5-turbo",
|
||||||
|
"gpt-3.5-turbo-16k",
|
||||||
|
"gpt-4",
|
||||||
|
"claude-instant-1",
|
||||||
|
{
|
||||||
|
"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
|
||||||
|
"custom_llm_provider": "replicate",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
messages = [
|
||||||
|
[{"role": "user", "content": "What is your name?"}],
|
||||||
|
[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
]
|
||||||
result = testing_batch_completion(models=models, messages=messages)
|
result = testing_batch_completion(models=models, messages=messages)
|
||||||
print(result)
|
print(result)
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
@ -11,9 +14,11 @@ litellm.set_verbose = False
|
||||||
|
|
||||||
score = 0
|
score = 0
|
||||||
|
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
def logger_fn(model_call_object: dict):
|
||||||
print(f"model call details: {model_call_object}")
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
@ -27,7 +32,9 @@ except:
|
||||||
|
|
||||||
# test on non-openai completion call
|
# test on non-openai completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, logger_fn=logger_fn
|
||||||
|
)
|
||||||
print(f"claude response: {response}")
|
print(f"claude response: {response}")
|
||||||
score += 1
|
score += 1
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,6 @@
|
||||||
# # # return this generator to the client for streaming requests
|
# # # return this generator to the client for streaming requests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # async def get_response():
|
# # async def get_response():
|
||||||
# # global generator
|
# # global generator
|
||||||
# # async for elem in generator:
|
# # async for elem in generator:
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
# import asyncio
|
# import asyncio
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# user_message = "respond in 20 words. who are you?"
|
# user_message = "respond in 20 words. who are you?"
|
||||||
# messages = [{ "content": user_message,"role": "user"}]
|
# messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
@ -45,8 +44,3 @@
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
# test_completion_ollama_stream()
|
# test_completion_ollama_stream()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
from infisical import InfisicalClient
|
from infisical import InfisicalClient
|
||||||
|
@ -28,5 +31,5 @@ def test_completion_openai():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
litellm.secret_manager_client = None
|
litellm.secret_manager_client = None
|
||||||
|
|
||||||
test_completion_openai()
|
|
||||||
|
|
||||||
|
test_completion_openai()
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
@ -11,17 +14,21 @@ litellm.set_verbose = False
|
||||||
|
|
||||||
score = 0
|
score = 0
|
||||||
|
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
def logger_fn(model_call_object: dict):
|
||||||
print(f"model call details: {model_call_object}")
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
# test on anthropic completion call
|
# test on anthropic completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
|
||||||
|
)
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta'])
|
print(chunk["choices"][0]["delta"])
|
||||||
score += 1
|
score += 1
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
@ -30,9 +37,16 @@ except:
|
||||||
|
|
||||||
# test on anthropic completion call
|
# test on anthropic completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="meta-llama/Llama-2-7b-chat-hf", messages=messages, custom_llm_provider="huggingface", custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud", stream=True, logger_fn=logger_fn)
|
response = completion(
|
||||||
|
model="meta-llama/Llama-2-7b-chat-hf",
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider="huggingface",
|
||||||
|
custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud",
|
||||||
|
stream=True,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
)
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta'])
|
print(chunk["choices"][0]["delta"])
|
||||||
score += 1
|
score += 1
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
|
|
@ -3,10 +3,14 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import time
|
import time
|
||||||
from litellm import timeout
|
from litellm import timeout
|
||||||
|
|
||||||
|
|
||||||
@timeout(10)
|
@timeout(10)
|
||||||
def stop_after_10_s(force_timeout=60):
|
def stop_after_10_s(force_timeout=60):
|
||||||
print("Stopping after 10 seconds")
|
print("Stopping after 10 seconds")
|
||||||
|
|
|
@ -11,9 +11,7 @@ from threading import Thread
|
||||||
from openai.error import Timeout
|
from openai.error import Timeout
|
||||||
|
|
||||||
|
|
||||||
def timeout(
|
def timeout(timeout_duration: float = None, exception_to_raise=Timeout):
|
||||||
timeout_duration: float = None, exception_to_raise = Timeout
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Wraps a function to raise the specified exception if execution time
|
Wraps a function to raise the specified exception if execution time
|
||||||
is greater than the specified timeout.
|
is greater than the specified timeout.
|
||||||
|
@ -44,7 +42,9 @@ def timeout(
|
||||||
result = future.result(timeout=local_timeout_duration)
|
result = future.result(timeout=local_timeout_duration)
|
||||||
except futures.TimeoutError:
|
except futures.TimeoutError:
|
||||||
thread.stop_loop()
|
thread.stop_loop()
|
||||||
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
|
raise exception_to_raise(
|
||||||
|
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
|
||||||
|
)
|
||||||
thread.stop_loop()
|
thread.stop_loop()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -59,7 +59,9 @@ def timeout(
|
||||||
)
|
)
|
||||||
return value
|
return value
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
|
raise exception_to_raise(
|
||||||
|
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
|
||||||
|
)
|
||||||
|
|
||||||
if iscoroutinefunction(func):
|
if iscoroutinefunction(func):
|
||||||
return async_wrapper
|
return async_wrapper
|
||||||
|
|
509
litellm/utils.py
509
litellm/utils.py
|
@ -5,6 +5,7 @@ import litellm, openai
|
||||||
import random, uuid, requests
|
import random, uuid, requests
|
||||||
import datetime, time
|
import datetime, time
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
from .integrations.helicone import HeliconeLogger
|
from .integrations.helicone import HeliconeLogger
|
||||||
|
@ -13,8 +14,15 @@ from .integrations.berrispend import BerriSpendLogger
|
||||||
from .integrations.supabase import Supabase
|
from .integrations.supabase import Supabase
|
||||||
from openai.error import OpenAIError as OriginalError
|
from openai.error import OpenAIError as OriginalError
|
||||||
from openai.openai_object import OpenAIObject
|
from openai.openai_object import OpenAIObject
|
||||||
from .exceptions import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from .exceptions import (
|
||||||
|
AuthenticationError,
|
||||||
|
InvalidRequestError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
OpenAIError,
|
||||||
|
)
|
||||||
from typing import List, Dict, Union
|
from typing import List, Dict, Union
|
||||||
|
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
sentry_sdk_instance = None
|
sentry_sdk_instance = None
|
||||||
|
@ -51,12 +59,14 @@ local_cache = {}
|
||||||
# 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
|
# 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
|
||||||
class Message(OpenAIObject):
|
class Message(OpenAIObject):
|
||||||
def __init__(self, content="default", role="assistant", **params):
|
def __init__(self, content="default", role="assistant", **params):
|
||||||
super(Message, self).__init__(**params)
|
super(Message, self).__init__(**params)
|
||||||
self.content = content
|
self.content = content
|
||||||
self.role = role
|
self.role = role
|
||||||
|
|
||||||
|
|
||||||
class Choices(OpenAIObject):
|
class Choices(OpenAIObject):
|
||||||
def __init__(self, finish_reason="stop", index=0, message=Message(), **params):
|
def __init__(self, finish_reason="stop", index=0, message=Message(), **params):
|
||||||
super(Choices, self).__init__(**params)
|
super(Choices, self).__init__(**params)
|
||||||
|
@ -64,22 +74,29 @@ class Choices(OpenAIObject):
|
||||||
self.index = index
|
self.index = index
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class ModelResponse(OpenAIObject):
|
class ModelResponse(OpenAIObject):
|
||||||
def __init__(self, choices=None, created=None, model=None, usage=None, **params):
|
def __init__(self, choices=None, created=None, model=None, usage=None, **params):
|
||||||
super(ModelResponse, self).__init__(**params)
|
super(ModelResponse, self).__init__(**params)
|
||||||
self.choices = choices if choices else [Choices()]
|
self.choices = choices if choices else [Choices()]
|
||||||
self.created = created
|
self.created = created
|
||||||
self.model = model
|
self.model = model
|
||||||
self.usage = usage if usage else {
|
self.usage = (
|
||||||
|
usage
|
||||||
|
if usage
|
||||||
|
else {
|
||||||
"prompt_tokens": None,
|
"prompt_tokens": None,
|
||||||
"completion_tokens": None,
|
"completion_tokens": None,
|
||||||
"total_tokens": None
|
"total_tokens": None,
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def to_dict_recursive(self):
|
def to_dict_recursive(self):
|
||||||
d = super().to_dict_recursive()
|
d = super().to_dict_recursive()
|
||||||
d['choices'] = [choice.to_dict_recursive() for choice in self.choices]
|
d["choices"] = [choice.to_dict_recursive() for choice in self.choices]
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
############################################################
|
############################################################
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
if litellm.set_verbose:
|
if litellm.set_verbose:
|
||||||
|
@ -87,9 +104,12 @@ def print_verbose(print_statement):
|
||||||
if random.random() <= 0.3:
|
if random.random() <= 0.3:
|
||||||
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
||||||
|
|
||||||
|
|
||||||
####### Package Import Handler ###################
|
####### Package Import Handler ###################
|
||||||
import importlib
|
import importlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
def install_and_import(package: str):
|
def install_and_import(package: str):
|
||||||
if package in globals().keys():
|
if package in globals().keys():
|
||||||
print_verbose(f"{package} has already been imported.")
|
print_verbose(f"{package} has already been imported.")
|
||||||
|
@ -108,11 +128,22 @@ def install_and_import(package: str):
|
||||||
finally:
|
finally:
|
||||||
if package not in globals().keys():
|
if package not in globals().keys():
|
||||||
globals()[package] = importlib.import_module(package)
|
globals()[package] = importlib.import_module(package)
|
||||||
|
|
||||||
|
|
||||||
##################################################
|
##################################################
|
||||||
|
|
||||||
|
|
||||||
####### LOGGING ###################
|
####### LOGGING ###################
|
||||||
# Logging function -> log the exact model details + what's being sent | Non-Blocking
|
# Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
def logging(model=None, input=None, custom_llm_provider=None, azure=False, additional_args={}, logger_fn=None, exception=None):
|
def logging(
|
||||||
|
model=None,
|
||||||
|
input=None,
|
||||||
|
custom_llm_provider=None,
|
||||||
|
azure=False,
|
||||||
|
additional_args={},
|
||||||
|
logger_fn=None,
|
||||||
|
exception=None,
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
model_call_details = {}
|
model_call_details = {}
|
||||||
if model:
|
if model:
|
||||||
|
@ -130,7 +161,12 @@ def logging(model=None, input=None, custom_llm_provider=None, azure=False, addit
|
||||||
model_call_details["additional_args"] = additional_args
|
model_call_details["additional_args"] = additional_args
|
||||||
# log additional call details -> api key, etc.
|
# log additional call details -> api key, etc.
|
||||||
if model:
|
if model:
|
||||||
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
if (
|
||||||
|
azure == True
|
||||||
|
or model in litellm.open_ai_chat_completion_models
|
||||||
|
or model in litellm.open_ai_chat_completion_models
|
||||||
|
or model in litellm.open_ai_embedding_models
|
||||||
|
):
|
||||||
model_call_details["api_type"] = openai.api_type
|
model_call_details["api_type"] = openai.api_type
|
||||||
model_call_details["api_base"] = openai.api_base
|
model_call_details["api_base"] = openai.api_base
|
||||||
model_call_details["api_version"] = openai.api_version
|
model_call_details["api_version"] = openai.api_version
|
||||||
|
@ -142,25 +178,42 @@ def logging(model=None, input=None, custom_llm_provider=None, azure=False, addit
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
||||||
print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
|
print_verbose(
|
||||||
|
f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}"
|
||||||
|
)
|
||||||
if logger_fn and callable(logger_fn):
|
if logger_fn and callable(logger_fn):
|
||||||
try:
|
try:
|
||||||
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
logger_fn(
|
||||||
|
model_call_details
|
||||||
|
) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
print(
|
||||||
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
print(
|
||||||
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
####### CLIENT ###################
|
####### CLIENT ###################
|
||||||
# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
|
# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
|
||||||
def client(original_function):
|
def client(original_function):
|
||||||
def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
def function_setup(
|
||||||
|
*args, **kwargs
|
||||||
|
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||||
try:
|
try:
|
||||||
global callback_list, add_breadcrumb, user_logger_fn
|
global callback_list, add_breadcrumb, user_logger_fn
|
||||||
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
if (
|
||||||
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0
|
||||||
set_callbacks(callback_list=callback_list,)
|
) and len(callback_list) == 0:
|
||||||
|
callback_list = list(
|
||||||
|
set(litellm.success_callback + litellm.failure_callback)
|
||||||
|
)
|
||||||
|
set_callbacks(
|
||||||
|
callback_list=callback_list,
|
||||||
|
)
|
||||||
if add_breadcrumb:
|
if add_breadcrumb:
|
||||||
add_breadcrumb(
|
add_breadcrumb(
|
||||||
category="litellm.llm_call",
|
category="litellm.llm_call",
|
||||||
|
@ -178,8 +231,16 @@ def client(original_function):
|
||||||
try:
|
try:
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
exception = kwargs["exception"] if "exception" in kwargs else None
|
exception = kwargs["exception"] if "exception" in kwargs else None
|
||||||
custom_llm_provider = kwargs["custom_llm_provider"] if "custom_llm_provider" in kwargs else None
|
custom_llm_provider = (
|
||||||
safe_crash_reporting(model=model, exception=exception, custom_llm_provider=custom_llm_provider) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
kwargs["custom_llm_provider"]
|
||||||
|
if "custom_llm_provider" in kwargs
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
safe_crash_reporting(
|
||||||
|
model=model,
|
||||||
|
exception=exception,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
||||||
except:
|
except:
|
||||||
# [Non-Blocking Error]
|
# [Non-Blocking Error]
|
||||||
pass
|
pass
|
||||||
|
@ -199,7 +260,9 @@ def client(original_function):
|
||||||
def check_cache(*args, **kwargs):
|
def check_cache(*args, **kwargs):
|
||||||
try: # never block execution
|
try: # never block execution
|
||||||
prompt = get_prompt(*args, **kwargs)
|
prompt = get_prompt(*args, **kwargs)
|
||||||
if prompt != None and prompt in local_cache: # check if messages / prompt exists
|
if (
|
||||||
|
prompt != None and prompt in local_cache
|
||||||
|
): # check if messages / prompt exists
|
||||||
result = local_cache[prompt]
|
result = local_cache[prompt]
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
|
@ -221,7 +284,10 @@ def client(original_function):
|
||||||
function_setup(*args, **kwargs)
|
function_setup(*args, **kwargs)
|
||||||
## MODEL CALL
|
## MODEL CALL
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
if litellm.caching and (cached_result := check_cache(*args, **kwargs)) is not None:
|
if (
|
||||||
|
litellm.caching
|
||||||
|
and (cached_result := check_cache(*args, **kwargs)) is not None
|
||||||
|
):
|
||||||
result = cached_result
|
result = cached_result
|
||||||
else:
|
else:
|
||||||
result = original_function(*args, **kwargs)
|
result = original_function(*args, **kwargs)
|
||||||
|
@ -231,26 +297,35 @@ def client(original_function):
|
||||||
add_cache(result, *args, **kwargs)
|
add_cache(result, *args, **kwargs)
|
||||||
## LOG SUCCESS
|
## LOG SUCCESS
|
||||||
crash_reporting(*args, **kwargs)
|
crash_reporting(*args, **kwargs)
|
||||||
my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread
|
my_thread = threading.Thread(
|
||||||
|
target=handle_success, args=(args, kwargs, result, start_time, end_time)
|
||||||
|
) # don't interrupt execution of main thread
|
||||||
my_thread.start()
|
my_thread.start()
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback_exception = traceback.format_exc()
|
traceback_exception = traceback.format_exc()
|
||||||
crash_reporting(*args, **kwargs, exception=traceback_exception)
|
crash_reporting(*args, **kwargs, exception=traceback_exception)
|
||||||
end_time = datetime.datetime.now()
|
end_time = datetime.datetime.now()
|
||||||
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, start_time, end_time, args, kwargs)) # don't interrupt execution of main thread
|
my_thread = threading.Thread(
|
||||||
|
target=handle_failure,
|
||||||
|
args=(e, traceback_exception, start_time, end_time, args, kwargs),
|
||||||
|
) # don't interrupt execution of main thread
|
||||||
my_thread.start()
|
my_thread.start()
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
####### USAGE CALCULATOR ################
|
####### USAGE CALCULATOR ################
|
||||||
|
|
||||||
|
|
||||||
def token_counter(model, text):
|
def token_counter(model, text):
|
||||||
# use tiktoken or anthropic's tokenizer depending on the model
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
num_tokens = 0
|
num_tokens = 0
|
||||||
if "claude" in model:
|
if "claude" in model:
|
||||||
install_and_import('anthropic')
|
install_and_import("anthropic")
|
||||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
|
||||||
anthropic = Anthropic()
|
anthropic = Anthropic()
|
||||||
num_tokens = anthropic.count_tokens(text)
|
num_tokens = anthropic.count_tokens(text)
|
||||||
else:
|
else:
|
||||||
|
@ -264,8 +339,12 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens =
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
model_cost_ref = litellm.model_cost
|
model_cost_ref = litellm.model_cost
|
||||||
if model in model_cost_ref:
|
if model in model_cost_ref:
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
else:
|
else:
|
||||||
# calculate average input cost
|
# calculate average input cost
|
||||||
|
@ -285,9 +364,12 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens =
|
||||||
def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
|
def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
|
||||||
prompt_tokens = token_counter(model=model, text=prompt)
|
prompt_tokens = token_counter(model=model, text=prompt)
|
||||||
completion_tokens = token_counter(model=model, text=completion)
|
completion_tokens = token_counter(model=model, text=completion)
|
||||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens)
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
|
||||||
|
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def get_litellm_params(
|
def get_litellm_params(
|
||||||
return_async=False,
|
return_async=False,
|
||||||
|
@ -300,7 +382,7 @@ def get_litellm_params(
|
||||||
replicate=False,
|
replicate=False,
|
||||||
together_ai=False,
|
together_ai=False,
|
||||||
custom_llm_provider=None,
|
custom_llm_provider=None,
|
||||||
custom_api_base=None
|
custom_api_base=None,
|
||||||
):
|
):
|
||||||
litellm_params = {
|
litellm_params = {
|
||||||
"return_async": return_async,
|
"return_async": return_async,
|
||||||
|
@ -309,7 +391,7 @@ def get_litellm_params(
|
||||||
"logger_fn": logger_fn,
|
"logger_fn": logger_fn,
|
||||||
"verbose": verbose,
|
"verbose": verbose,
|
||||||
"custom_llm_provider": custom_llm_provider,
|
"custom_llm_provider": custom_llm_provider,
|
||||||
"custom_api_base": custom_api_base
|
"custom_api_base": custom_api_base,
|
||||||
}
|
}
|
||||||
|
|
||||||
return litellm_params
|
return litellm_params
|
||||||
|
@ -324,7 +406,7 @@ def get_optional_params(
|
||||||
n=1,
|
n=1,
|
||||||
stream=False,
|
stream=False,
|
||||||
stop=None,
|
stop=None,
|
||||||
max_tokens = float('inf'),
|
max_tokens=float("inf"),
|
||||||
presence_penalty=0,
|
presence_penalty=0,
|
||||||
frequency_penalty=0,
|
frequency_penalty=0,
|
||||||
logit_bias={},
|
logit_bias={},
|
||||||
|
@ -352,7 +434,7 @@ def get_optional_params(
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
if temperature != 1:
|
if temperature != 1:
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float("inf"):
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
return optional_params
|
return optional_params
|
||||||
elif custom_llm_provider == "replicate":
|
elif custom_llm_provider == "replicate":
|
||||||
|
@ -368,16 +450,18 @@ def get_optional_params(
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
if top_p != 1:
|
if top_p != 1:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float("inf"):
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
if frequency_penalty != 0:
|
if frequency_penalty != 0:
|
||||||
optional_params["frequency_penalty"] = frequency_penalty
|
optional_params["frequency_penalty"] = frequency_penalty
|
||||||
elif model == "chat-bison": # chat-bison has diff args from chat-bison@001 ty Google
|
elif (
|
||||||
|
model == "chat-bison"
|
||||||
|
): # chat-bison has diff args from chat-bison@001 ty Google
|
||||||
if temperature != 1:
|
if temperature != 1:
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
if top_p != 1:
|
if top_p != 1:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float("inf"):
|
||||||
optional_params["max_output_tokens"] = max_tokens
|
optional_params["max_output_tokens"] = max_tokens
|
||||||
elif model in litellm.vertex_text_models:
|
elif model in litellm.vertex_text_models:
|
||||||
# required params for all text vertex calls
|
# required params for all text vertex calls
|
||||||
|
@ -402,7 +486,7 @@ def get_optional_params(
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
if stop != None:
|
if stop != None:
|
||||||
optional_params["stop"] = stop
|
optional_params["stop"] = stop
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float("inf"):
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
if presence_penalty != 0:
|
if presence_penalty != 0:
|
||||||
optional_params["presence_penalty"] = presence_penalty
|
optional_params["presence_penalty"] = presence_penalty
|
||||||
|
@ -417,7 +501,15 @@ def get_optional_params(
|
||||||
return optional_params
|
return optional_params
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, force_timeout: int = None):
|
|
||||||
|
def load_test_model(
|
||||||
|
model: str,
|
||||||
|
custom_llm_provider: str = None,
|
||||||
|
custom_api_base: str = None,
|
||||||
|
prompt: str = None,
|
||||||
|
num_calls: int = None,
|
||||||
|
force_timeout: int = None,
|
||||||
|
):
|
||||||
test_prompt = "Hey, how's it going"
|
test_prompt = "Hey, how's it going"
|
||||||
test_calls = 100
|
test_calls = 100
|
||||||
if prompt:
|
if prompt:
|
||||||
|
@ -427,14 +519,31 @@ def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base
|
||||||
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=force_timeout)
|
litellm.batch_completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
custom_api_base=custom_api_base,
|
||||||
|
force_timeout=force_timeout,
|
||||||
|
)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None}
|
return {
|
||||||
|
"total_response_time": response_time,
|
||||||
|
"calls_made": 100,
|
||||||
|
"status": "success",
|
||||||
|
"exception": None,
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": 100, "status": "failed", "exception": e}
|
return {
|
||||||
|
"total_response_time": response_time,
|
||||||
|
"calls_made": 100,
|
||||||
|
"status": "failed",
|
||||||
|
"exception": e,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(callback_list):
|
def set_callbacks(callback_list):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
|
||||||
|
@ -445,11 +554,20 @@ def set_callbacks(callback_list):
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
subprocess.check_call(
|
||||||
|
[sys.executable, "-m", "pip", "install", "sentry_sdk"]
|
||||||
|
)
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
sentry_sdk_instance = sentry_sdk
|
sentry_sdk_instance = sentry_sdk
|
||||||
sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0"
|
sentry_trace_rate = (
|
||||||
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(sentry_trace_rate))
|
os.environ.get("SENTRY_API_TRACE_RATE")
|
||||||
|
if "SENTRY_API_TRACE_RATE" in os.environ
|
||||||
|
else "1.0"
|
||||||
|
)
|
||||||
|
sentry_sdk_instance.init(
|
||||||
|
dsn=os.environ.get("SENTRY_API_URL"),
|
||||||
|
traces_sample_rate=float(sentry_trace_rate),
|
||||||
|
)
|
||||||
capture_exception = sentry_sdk_instance.capture_exception
|
capture_exception = sentry_sdk_instance.capture_exception
|
||||||
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
|
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
|
||||||
elif callback == "posthog":
|
elif callback == "posthog":
|
||||||
|
@ -457,21 +575,26 @@ def set_callbacks(callback_list):
|
||||||
from posthog import Posthog
|
from posthog import Posthog
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print_verbose("Package 'posthog' is missing. Installing it...")
|
print_verbose("Package 'posthog' is missing. Installing it...")
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
|
subprocess.check_call(
|
||||||
|
[sys.executable, "-m", "pip", "install", "posthog"]
|
||||||
|
)
|
||||||
from posthog import Posthog
|
from posthog import Posthog
|
||||||
posthog = Posthog(
|
posthog = Posthog(
|
||||||
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
||||||
host=os.environ.get("POSTHOG_API_URL"))
|
host=os.environ.get("POSTHOG_API_URL"),
|
||||||
|
)
|
||||||
elif callback == "slack":
|
elif callback == "slack":
|
||||||
try:
|
try:
|
||||||
from slack_bolt import App
|
from slack_bolt import App
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
|
subprocess.check_call(
|
||||||
|
[sys.executable, "-m", "pip", "install", "slack_bolt"]
|
||||||
|
)
|
||||||
from slack_bolt import App
|
from slack_bolt import App
|
||||||
slack_app = App(
|
slack_app = App(
|
||||||
token=os.environ.get("SLACK_API_TOKEN"),
|
token=os.environ.get("SLACK_API_TOKEN"),
|
||||||
signing_secret=os.environ.get("SLACK_API_SECRET")
|
signing_secret=os.environ.get("SLACK_API_SECRET"),
|
||||||
)
|
)
|
||||||
alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
||||||
print_verbose(f"Initialized Slack App: {slack_app}")
|
print_verbose(f"Initialized Slack App: {slack_app}")
|
||||||
|
@ -496,10 +619,11 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
|
||||||
additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
|
additional_details["Event_Name"] = additional_details.pop(
|
||||||
|
"failed_event_name", "litellm.failed_query"
|
||||||
|
)
|
||||||
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
|
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
|
||||||
|
|
||||||
|
|
||||||
# print_verbose(f"additional_details: {additional_details}")
|
# print_verbose(f"additional_details: {additional_details}")
|
||||||
for callback in litellm.failure_callback:
|
for callback in litellm.failure_callback:
|
||||||
try:
|
try:
|
||||||
|
@ -514,11 +638,15 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
for detail in additional_details:
|
for detail in additional_details:
|
||||||
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
slack_msg += f"Traceback: {traceback_exception}"
|
slack_msg += f"Traceback: {traceback_exception}"
|
||||||
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
slack_app.client.chat_postMessage(
|
||||||
|
channel=alerts_channel, text=slack_msg
|
||||||
|
)
|
||||||
elif callback == "sentry":
|
elif callback == "sentry":
|
||||||
capture_exception(exception)
|
capture_exception(exception)
|
||||||
elif callback == "posthog":
|
elif callback == "posthog":
|
||||||
print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
|
print_verbose(
|
||||||
|
f"inside posthog, additional_details: {len(additional_details.keys())}"
|
||||||
|
)
|
||||||
ph_obj = {}
|
ph_obj = {}
|
||||||
if len(kwargs) > 0:
|
if len(kwargs) > 0:
|
||||||
ph_obj = kwargs
|
ph_obj = kwargs
|
||||||
|
@ -531,7 +659,9 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
print_verbose(f"ph_obj: {ph_obj}")
|
print_verbose(f"ph_obj: {ph_obj}")
|
||||||
print_verbose(f"PostHog Event Name: {event_name}")
|
print_verbose(f"PostHog Event Name: {event_name}")
|
||||||
if "user_id" in additional_details:
|
if "user_id" in additional_details:
|
||||||
posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
posthog.capture(
|
||||||
|
additional_details["user_id"], event_name, ph_obj
|
||||||
|
)
|
||||||
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
||||||
unique_id = str(uuid.uuid4())
|
unique_id = str(uuid.uuid4())
|
||||||
posthog.capture(unique_id, event_name)
|
posthog.capture(unique_id, event_name)
|
||||||
|
@ -545,11 +675,20 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
"created": time.time(),
|
"created": time.time(),
|
||||||
"error": traceback_exception,
|
"error": traceback_exception,
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": prompt_token_calculator(model, messages=messages),
|
"prompt_tokens": prompt_token_calculator(
|
||||||
"completion_tokens": 0
|
model, messages=messages
|
||||||
|
),
|
||||||
|
"completion_tokens": 0,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
berrispendLogger.log_event(
|
||||||
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif callback == "aispend":
|
elif callback == "aispend":
|
||||||
print_verbose("reaches aispend for logging!")
|
print_verbose("reaches aispend for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
|
@ -558,11 +697,19 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
"model": model,
|
"model": model,
|
||||||
"created": time.time(),
|
"created": time.time(),
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": prompt_token_calculator(model, messages=messages),
|
"prompt_tokens": prompt_token_calculator(
|
||||||
"completion_tokens": 0
|
model, messages=messages
|
||||||
|
),
|
||||||
|
"completion_tokens": 0,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
aispendLogger.log_event(
|
||||||
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
model=model,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif callback == "supabase":
|
elif callback == "supabase":
|
||||||
print_verbose("reaches supabase for logging!")
|
print_verbose("reaches supabase for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
|
@ -572,21 +719,33 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
"created": time.time(),
|
"created": time.time(),
|
||||||
"error": traceback_exception,
|
"error": traceback_exception,
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": prompt_token_calculator(model, messages=messages),
|
"prompt_tokens": prompt_token_calculator(
|
||||||
"completion_tokens": 0
|
model, messages=messages
|
||||||
}
|
),
|
||||||
|
"completion_tokens": 0,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
print(f"litellm._thread_context: {litellm._thread_context}")
|
print(f"litellm._thread_context: {litellm._thread_context}")
|
||||||
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
supabaseClient.log_event(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
end_user=litellm._thread_context.user,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
|
print_verbose(
|
||||||
|
f"Error Occurred while logging failure: {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if failure_handler and callable(failure_handler):
|
if failure_handler and callable(failure_handler):
|
||||||
call_details = {
|
call_details = {
|
||||||
"exception": exception,
|
"exception": exception,
|
||||||
"additional_details": additional_details
|
"additional_details": additional_details,
|
||||||
}
|
}
|
||||||
failure_handler(call_details)
|
failure_handler(call_details)
|
||||||
pass
|
pass
|
||||||
|
@ -595,12 +754,15 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
||||||
logging(logger_fn=user_logger_fn, exception=e)
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def handle_success(args, kwargs, result, start_time, end_time):
|
def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
global heliconeLogger, aispendLogger
|
global heliconeLogger, aispendLogger
|
||||||
try:
|
try:
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
|
additional_details["Event_Name"] = additional_details.pop(
|
||||||
|
"successful_event_name", "litellm.succes_query"
|
||||||
|
)
|
||||||
for callback in litellm.success_callback:
|
for callback in litellm.success_callback:
|
||||||
try:
|
try:
|
||||||
if callback == "posthog":
|
if callback == "posthog":
|
||||||
|
@ -609,7 +771,9 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
ph_obj[detail] = additional_details[detail]
|
ph_obj[detail] = additional_details[detail]
|
||||||
event_name = additional_details["Event_Name"]
|
event_name = additional_details["Event_Name"]
|
||||||
if "user_id" in additional_details:
|
if "user_id" in additional_details:
|
||||||
posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
posthog.capture(
|
||||||
|
additional_details["user_id"], event_name, ph_obj
|
||||||
|
)
|
||||||
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
||||||
unique_id = str(uuid.uuid4())
|
unique_id = str(uuid.uuid4())
|
||||||
posthog.capture(unique_id, event_name, ph_obj)
|
posthog.capture(unique_id, event_name, ph_obj)
|
||||||
|
@ -618,31 +782,63 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
slack_msg = ""
|
slack_msg = ""
|
||||||
for detail in additional_details:
|
for detail in additional_details:
|
||||||
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
slack_app.client.chat_postMessage(
|
||||||
|
channel=alerts_channel, text=slack_msg
|
||||||
|
)
|
||||||
elif callback == "helicone":
|
elif callback == "helicone":
|
||||||
print_verbose("reaches helicone for logging!")
|
print_verbose("reaches helicone for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||||
heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
heliconeLogger.log_success(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif callback == "aispend":
|
elif callback == "aispend":
|
||||||
print_verbose("reaches aispend for logging!")
|
print_verbose("reaches aispend for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
aispendLogger.log_event(
|
||||||
|
model=model,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif callback == "berrispend":
|
elif callback == "berrispend":
|
||||||
print_verbose("reaches berrispend for logging!")
|
print_verbose("reaches berrispend for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||||
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
berrispendLogger.log_event(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif callback == "supabase":
|
elif callback == "supabase":
|
||||||
print_verbose("reaches supabase for logging!")
|
print_verbose("reaches supabase for logging!")
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||||
print(f"litellm._thread_context: {litellm._thread_context}")
|
print(f"litellm._thread_context: {litellm._thread_context}")
|
||||||
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
supabaseClient.log_event(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
end_user=litellm._thread_context.user,
|
||||||
|
response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(logger_fn=user_logger_fn, exception=e)
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
print_verbose(
|
||||||
|
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if success_handler and callable(success_handler):
|
if success_handler and callable(success_handler):
|
||||||
|
@ -651,22 +847,27 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(logger_fn=user_logger_fn, exception=e)
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
print_verbose(
|
||||||
|
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def prompt_token_calculator(model, messages):
|
def prompt_token_calculator(model, messages):
|
||||||
# use tiktoken or anthropic's tokenizer depending on the model
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
text = " ".join(message["content"] for message in messages)
|
text = " ".join(message["content"] for message in messages)
|
||||||
num_tokens = 0
|
num_tokens = 0
|
||||||
if "claude" in model:
|
if "claude" in model:
|
||||||
install_and_import('anthropic')
|
install_and_import("anthropic")
|
||||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
|
||||||
anthropic = Anthropic()
|
anthropic = Anthropic()
|
||||||
num_tokens = anthropic.count_tokens(text)
|
num_tokens = anthropic.count_tokens(text)
|
||||||
else:
|
else:
|
||||||
num_tokens = len(encoding.encode(text))
|
num_tokens = len(encoding.encode(text))
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
||||||
|
|
||||||
# integration helper function
|
# integration helper function
|
||||||
def modify_integration(integration_name, integration_params):
|
def modify_integration(integration_name, integration_params):
|
||||||
global supabaseClient
|
global supabaseClient
|
||||||
|
@ -674,6 +875,7 @@ def modify_integration(integration_name, integration_params):
|
||||||
if "table_name" in integration_params:
|
if "table_name" in integration_params:
|
||||||
Supabase.supabase_table_name = integration_params["table_name"]
|
Supabase.supabase_table_name = integration_params["table_name"]
|
||||||
|
|
||||||
|
|
||||||
def exception_type(model, original_exception, custom_llm_provider):
|
def exception_type(model, original_exception, custom_llm_provider):
|
||||||
global user_logger_fn
|
global user_logger_fn
|
||||||
exception_mapping_worked = False
|
exception_mapping_worked = False
|
||||||
|
@ -692,80 +894,153 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
exception_type = type(original_exception).__name__
|
exception_type = type(original_exception).__name__
|
||||||
else:
|
else:
|
||||||
exception_type = ""
|
exception_type = ""
|
||||||
logging(model=model, additional_args={"error_str": error_str, "exception_type": exception_type, "original_exception": original_exception}, logger_fn=user_logger_fn)
|
logging(
|
||||||
|
model=model,
|
||||||
|
additional_args={
|
||||||
|
"error_str": error_str,
|
||||||
|
"exception_type": exception_type,
|
||||||
|
"original_exception": original_exception,
|
||||||
|
},
|
||||||
|
logger_fn=user_logger_fn,
|
||||||
|
)
|
||||||
if "claude" in model: # one of the anthropics
|
if "claude" in model: # one of the anthropics
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
print_verbose(f"status_code: {original_exception.status_code}")
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic")
|
raise AuthenticationError(
|
||||||
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(message=f"AnthropicException - {original_exception.message}", model=model, llm_provider="anthropic")
|
raise InvalidRequestError(
|
||||||
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic")
|
raise RateLimitError(
|
||||||
elif "Could not resolve authentication method. Expected either api_key or auth_token to be set." in error_str:
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
"Could not resolve authentication method. Expected either api_key or auth_token to be set."
|
||||||
|
in error_str
|
||||||
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic")
|
raise AuthenticationError(
|
||||||
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
if "Incorrect authentication token" in error_str:
|
if "Incorrect authentication token" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(message=f"ReplicateException - {error_str}", llm_provider="replicate")
|
raise AuthenticationError(
|
||||||
|
message=f"ReplicateException - {error_str}",
|
||||||
|
llm_provider="replicate",
|
||||||
|
)
|
||||||
elif exception_type == "ModelError":
|
elif exception_type == "ModelError":
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(message=f"ReplicateException - {error_str}", model=model, llm_provider="replicate")
|
raise InvalidRequestError(
|
||||||
|
message=f"ReplicateException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="replicate",
|
||||||
|
)
|
||||||
elif "Request was throttled" in error_str:
|
elif "Request was throttled" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(message=f"ReplicateException - {error_str}", llm_provider="replicate")
|
raise RateLimitError(
|
||||||
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
|
message=f"ReplicateException - {error_str}",
|
||||||
raise ServiceUnavailableError(message=f"ReplicateException - {error_str}", llm_provider="replicate")
|
llm_provider="replicate",
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
exception_type == "ReplicateError"
|
||||||
|
): ## ReplicateError implies an error on Replicate server side, not user side
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
message=f"ReplicateException - {error_str}",
|
||||||
|
llm_provider="replicate",
|
||||||
|
)
|
||||||
elif model == "command-nightly": # Cohere
|
elif model == "command-nightly": # Cohere
|
||||||
if "invalid api token" in error_str or "No API key provided." in error_str:
|
if (
|
||||||
|
"invalid api token" in error_str
|
||||||
|
or "No API key provided." in error_str
|
||||||
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(message=f"CohereException - {original_exception.message}", llm_provider="cohere")
|
raise AuthenticationError(
|
||||||
|
message=f"CohereException - {original_exception.message}",
|
||||||
|
llm_provider="cohere",
|
||||||
|
)
|
||||||
elif "too many tokens" in error_str:
|
elif "too many tokens" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(message=f"CohereException - {original_exception.message}", model=model, llm_provider="cohere")
|
raise InvalidRequestError(
|
||||||
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
message=f"CohereException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="cohere",
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
"CohereConnectionError" in exception_type
|
||||||
|
): # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(message=f"CohereException - {original_exception.message}", llm_provider="cohere")
|
raise RateLimitError(
|
||||||
|
message=f"CohereException - {original_exception.message}",
|
||||||
|
llm_provider="cohere",
|
||||||
|
)
|
||||||
elif custom_llm_provider == "huggingface":
|
elif custom_llm_provider == "huggingface":
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(message=f"HuggingfaceException - {original_exception.message}", llm_provider="huggingface")
|
raise AuthenticationError(
|
||||||
|
message=f"HuggingfaceException - {original_exception.message}",
|
||||||
|
llm_provider="huggingface",
|
||||||
|
)
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(message=f"HuggingfaceException - {original_exception.message}", model=model, llm_provider="huggingface")
|
raise InvalidRequestError(
|
||||||
|
message=f"HuggingfaceException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="huggingface",
|
||||||
|
)
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(message=f"HuggingfaceException - {original_exception.message}", llm_provider="huggingface")
|
raise RateLimitError(
|
||||||
|
message=f"HuggingfaceException - {original_exception.message}",
|
||||||
|
llm_provider="huggingface",
|
||||||
|
)
|
||||||
raise original_exception # base case - return the original exception
|
raise original_exception # base case - return the original exception
|
||||||
else:
|
else:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(logger_fn=user_logger_fn, additional_args={"exception_mapping_worked": exception_mapping_worked, "original_exception": original_exception}, exception=e)
|
logging(
|
||||||
|
logger_fn=user_logger_fn,
|
||||||
|
additional_args={
|
||||||
|
"exception_mapping_worked": exception_mapping_worked,
|
||||||
|
"original_exception": original_exception,
|
||||||
|
},
|
||||||
|
exception=e,
|
||||||
|
)
|
||||||
if exception_mapping_worked:
|
if exception_mapping_worked:
|
||||||
raise e
|
raise e
|
||||||
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
|
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
|
|
||||||
def safe_crash_reporting(model=None, exception=None, custom_llm_provider=None):
|
def safe_crash_reporting(model=None, exception=None, custom_llm_provider=None):
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"exception": str(exception),
|
"exception": str(exception),
|
||||||
"custom_llm_provider": custom_llm_provider
|
"custom_llm_provider": custom_llm_provider,
|
||||||
}
|
}
|
||||||
threading.Thread(target=litellm_telemetry, args=(data,)).start()
|
threading.Thread(target=litellm_telemetry, args=(data,)).start()
|
||||||
|
|
||||||
|
|
||||||
def litellm_telemetry(data):
|
def litellm_telemetry(data):
|
||||||
# Load or generate the UUID
|
# Load or generate the UUID
|
||||||
uuid_file = 'litellm_uuid.txt'
|
uuid_file = "litellm_uuid.txt"
|
||||||
try:
|
try:
|
||||||
# Try to open the file and load the UUID
|
# Try to open the file and load the UUID
|
||||||
with open(uuid_file, 'r') as file:
|
with open(uuid_file, "r") as file:
|
||||||
uuid_value = file.read()
|
uuid_value = file.read()
|
||||||
if uuid_value:
|
if uuid_value:
|
||||||
uuid_value = uuid_value.strip()
|
uuid_value = uuid_value.strip()
|
||||||
|
@ -775,7 +1050,7 @@ def litellm_telemetry(data):
|
||||||
# Generate a new UUID if the file doesn't exist or is empty
|
# Generate a new UUID if the file doesn't exist or is empty
|
||||||
new_uuid = uuid.uuid4()
|
new_uuid = uuid.uuid4()
|
||||||
uuid_value = str(new_uuid)
|
uuid_value = str(new_uuid)
|
||||||
with open(uuid_file, 'w') as file:
|
with open(uuid_file, "w") as file:
|
||||||
file.write(uuid_value)
|
file.write(uuid_value)
|
||||||
except:
|
except:
|
||||||
# [Non-Blocking Error]
|
# [Non-Blocking Error]
|
||||||
|
@ -784,17 +1059,22 @@ def litellm_telemetry(data):
|
||||||
try:
|
try:
|
||||||
# Prepare the data to send to litellm logging api
|
# Prepare the data to send to litellm logging api
|
||||||
payload = {
|
payload = {
|
||||||
'uuid': uuid_value,
|
"uuid": uuid_value,
|
||||||
'data': data,
|
"data": data,
|
||||||
'version': pkg_resources.get_distribution("litellm").version
|
"version": pkg_resources.get_distribution("litellm").version,
|
||||||
}
|
}
|
||||||
# Make the POST request to litellm logging api
|
# Make the POST request to litellm logging api
|
||||||
response = requests.post('https://litellm.berri.ai/logging', headers={"Content-Type": "application/json"}, json=payload)
|
response = requests.post(
|
||||||
|
"https://litellm.berri.ai/logging",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
json=payload,
|
||||||
|
)
|
||||||
response.raise_for_status() # Raise an exception for HTTP errors
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
except:
|
except:
|
||||||
# [Non-Blocking Error]
|
# [Non-Blocking Error]
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
######### Secret Manager ############################
|
######### Secret Manager ############################
|
||||||
# checks if user has passed in a secret manager client
|
# checks if user has passed in a secret manager client
|
||||||
# if passed in then checks the secret there
|
# if passed in then checks the secret there
|
||||||
|
@ -812,6 +1092,7 @@ def get_secret(secret_name):
|
||||||
else:
|
else:
|
||||||
return os.environ.get(secret_name)
|
return os.environ.get(secret_name)
|
||||||
|
|
||||||
|
|
||||||
######## Streaming Class ############################
|
######## Streaming Class ############################
|
||||||
# wraps the completion stream to return the correct format for the model
|
# wraps the completion stream to return the correct format for the model
|
||||||
# replicate/anthropic/cohere
|
# replicate/anthropic/cohere
|
||||||
|
@ -831,8 +1112,8 @@ class CustomStreamWrapper:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def handle_anthropic_chunk(self, chunk):
|
def handle_anthropic_chunk(self, chunk):
|
||||||
str_line = chunk.decode('utf-8') # Convert bytes to string
|
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||||
if str_line.startswith('data:'):
|
if str_line.startswith("data:"):
|
||||||
data_json = json.loads(str_line[5:])
|
data_json = json.loads(str_line[5:])
|
||||||
return data_json.get("completion", "")
|
return data_json.get("completion", "")
|
||||||
return ""
|
return ""
|
||||||
|
@ -850,7 +1131,7 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
def handle_huggingface_chunk(self, chunk):
|
def handle_huggingface_chunk(self, chunk):
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
if chunk.startswith('data:'):
|
if chunk.startswith("data:"):
|
||||||
data_json = json.loads(chunk[5:])
|
data_json = json.loads(chunk[5:])
|
||||||
if "token" in data_json and "text" in data_json["token"]:
|
if "token" in data_json and "text" in data_json["token"]:
|
||||||
return data_json["token"]["text"]
|
return data_json["token"]["text"]
|
||||||
|
@ -882,11 +1163,11 @@ class CustomStreamWrapper:
|
||||||
return {"choices": [{"delta": completion_obj}]}
|
return {"choices": [{"delta": completion_obj}]}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########## Reading Config File ############################
|
########## Reading Config File ############################
|
||||||
def read_config_args(config_path):
|
def read_config_args(config_path):
|
||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
current_path = os.getcwd()
|
current_path = os.getcwd()
|
||||||
with open(config_path, "r") as config_file:
|
with open(config_path, "r") as config_file:
|
||||||
config = json.load(config_file)
|
config = json.load(config_file)
|
||||||
|
@ -900,9 +1181,13 @@ def read_config_args(config_path):
|
||||||
|
|
||||||
########## ollama implementation ############################
|
########## ollama implementation ############################
|
||||||
import aiohttp
|
import aiohttp
|
||||||
async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
|
|
||||||
|
|
||||||
|
async def get_ollama_response_stream(
|
||||||
|
api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"
|
||||||
|
):
|
||||||
session = aiohttp.ClientSession()
|
session = aiohttp.ClientSession()
|
||||||
url = f'{api_base}/api/generate'
|
url = f"{api_base}/api/generate"
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
@ -918,7 +1203,10 @@ async def get_ollama_response_stream(api_base="http://localhost:11434", model="l
|
||||||
if chunk.strip() != "":
|
if chunk.strip() != "":
|
||||||
j = json.loads(chunk)
|
j = json.loads(chunk)
|
||||||
if "response" in j:
|
if "response" in j:
|
||||||
completion_obj ={ "role": "assistant", "content": ""}
|
completion_obj = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
}
|
||||||
completion_obj["content"] = j["response"]
|
completion_obj["content"] = j["response"]
|
||||||
yield {"choices": [{"delta": completion_obj}]}
|
yield {"choices": [{"delta": completion_obj}]}
|
||||||
# self.responses.append(j["response"])
|
# self.responses.append(j["response"])
|
||||||
|
@ -939,7 +1227,7 @@ async def stream_to_string(generator):
|
||||||
########## Together AI streaming #############################
|
########## Together AI streaming #############################
|
||||||
async def together_ai_completion_streaming(json_data, headers):
|
async def together_ai_completion_streaming(json_data, headers):
|
||||||
session = aiohttp.ClientSession()
|
session = aiohttp.ClientSession()
|
||||||
url = 'https://api.together.xyz/inference'
|
url = "https://api.together.xyz/inference"
|
||||||
# headers = {
|
# headers = {
|
||||||
# 'Authorization': f'Bearer {together_ai_token}',
|
# 'Authorization': f'Bearer {together_ai_token}',
|
||||||
# 'Content-Type': 'application/json'
|
# 'Content-Type': 'application/json'
|
||||||
|
@ -962,10 +1250,10 @@ async def together_ai_completion_streaming(json_data, headers):
|
||||||
if line:
|
if line:
|
||||||
try:
|
try:
|
||||||
json_chunk = line.decode("utf-8")
|
json_chunk = line.decode("utf-8")
|
||||||
json_string = json_chunk.split('data: ')[1]
|
json_string = json_chunk.split("data: ")[1]
|
||||||
# Convert the JSON string to a dictionary
|
# Convert the JSON string to a dictionary
|
||||||
data_dict = json.loads(json_string)
|
data_dict = json.loads(json_string)
|
||||||
completion_response = data_dict['choices'][0]['text']
|
completion_response = data_dict["choices"][0]["text"]
|
||||||
completion_obj = {"role": "assistant", "content": ""}
|
completion_obj = {"role": "assistant", "content": ""}
|
||||||
completion_obj["content"] = completion_response
|
completion_obj["content"] = completion_response
|
||||||
yield {"choices": [{"delta": completion_obj}]}
|
yield {"choices": [{"delta": completion_obj}]}
|
||||||
|
@ -973,4 +1261,3 @@ async def together_ai_completion_streaming(json_data, headers):
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
await session.close()
|
await session.close()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue