mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
295 lines
10 KiB
Python
295 lines
10 KiB
Python
import os, openai, cohere, replicate, sys
|
|
from typing import Any
|
|
from func_timeout import func_set_timeout, FunctionTimedOut
|
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
|
import traceback
|
|
import dotenv
|
|
import traceback
|
|
import litellm
|
|
from litellm import client, logging, exception_type
|
|
from litellm import success_callback, failure_callback
|
|
import random
|
|
####### ENVIRONMENT VARIABLES ###################
|
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
|
|
|
|
|
|
|
def get_optional_params(
|
|
# 12 optional params
|
|
functions = [],
|
|
function_call = "",
|
|
temperature = 1,
|
|
top_p = 1,
|
|
n = 1,
|
|
stream = False,
|
|
stop = None,
|
|
max_tokens = float('inf'),
|
|
presence_penalty = 0,
|
|
frequency_penalty = 0,
|
|
logit_bias = {},
|
|
user = "",
|
|
):
|
|
optional_params = {}
|
|
if functions != []:
|
|
optional_params["functions"] = functions
|
|
if function_call != "":
|
|
optional_params["function_call"] = function_call
|
|
if temperature != 1:
|
|
optional_params["temperature"] = temperature
|
|
if top_p != 1:
|
|
optional_params["top_p"] = top_p
|
|
if n != 1:
|
|
optional_params["n"] = n
|
|
if stream:
|
|
optional_params["stream"] = stream
|
|
if stop != None:
|
|
optional_params["stop"] = stop
|
|
if max_tokens != float('inf'):
|
|
optional_params["max_tokens"] = max_tokens
|
|
if presence_penalty != 0:
|
|
optional_params["presence_penalty"] = presence_penalty
|
|
if frequency_penalty != 0:
|
|
optional_params["frequency_penalty"] = frequency_penalty
|
|
if logit_bias != {}:
|
|
optional_params["logit_bias"] = logit_bias
|
|
if user != "":
|
|
optional_params["user"] = user
|
|
return optional_params
|
|
|
|
####### COMPLETION ENDPOINTS ################
|
|
#############################################
|
|
@client
|
|
@func_set_timeout(180, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
|
|
def completion(
|
|
model, messages, # required params
|
|
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
|
functions=[], function_call="", # optional params
|
|
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
|
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="",
|
|
# Optional liteLLM function params
|
|
*, forceTimeout=60, azure=False, logger_fn=None, verbose=False
|
|
):
|
|
try:
|
|
# check if user passed in any of the OpenAI optional params
|
|
optional_params = get_optional_params(
|
|
functions=functions, function_call=function_call,
|
|
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
|
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
|
|
)
|
|
if azure == True:
|
|
# azure configs
|
|
openai.api_type = "azure"
|
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
|
## LOGGING
|
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
response = openai.ChatCompletion.create(
|
|
engine=model,
|
|
messages = messages,
|
|
**optional_params
|
|
)
|
|
elif model in litellm.open_ai_chat_completion_models:
|
|
openai.api_type = "openai"
|
|
openai.api_base = "https://api.openai.com/v1"
|
|
openai.api_version = None
|
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
## LOGGING
|
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
|
|
|
## COMPLETION CALL
|
|
response = openai.ChatCompletion.create(
|
|
model=model,
|
|
messages = messages,
|
|
**optional_params
|
|
)
|
|
elif model in litellm.open_ai_text_completion_models:
|
|
openai.api_type = "openai"
|
|
openai.api_base = "https://api.openai.com/v1"
|
|
openai.api_version = None
|
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
prompt = " ".join([message["content"] for message in messages])
|
|
## LOGGING
|
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
response = openai.Completion.create(
|
|
model=model,
|
|
prompt = prompt
|
|
)
|
|
elif "replicate" in model:
|
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
|
# checking in case user set it to REPLICATE_API_KEY instead
|
|
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
|
|
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
|
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
|
prompt = " ".join([message["content"] for message in messages])
|
|
input = {"prompt": prompt}
|
|
if max_tokens != float('inf'):
|
|
input["max_length"] = max_tokens # for t5 models
|
|
input["max_new_tokens"] = max_tokens # for llama2 models
|
|
## LOGGING
|
|
logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
output = replicate.run(
|
|
model,
|
|
input=input)
|
|
response = ""
|
|
for item in output:
|
|
response += item
|
|
new_response = {
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": response,
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
response = new_response
|
|
elif model in litellm.anthropic_models:
|
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
|
prompt = f"{HUMAN_PROMPT}"
|
|
for message in messages:
|
|
if "role" in message:
|
|
if message["role"] == "user":
|
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
|
else:
|
|
prompt += f"{AI_PROMPT}{message['content']}"
|
|
else:
|
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
|
prompt += f"{AI_PROMPT}"
|
|
anthropic = Anthropic()
|
|
# check if user passed in max_tokens != float('inf')
|
|
if max_tokens != float('inf'):
|
|
max_tokens_to_sample = max_tokens
|
|
else:
|
|
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
|
## LOGGING
|
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
completion = anthropic.completions.create(
|
|
model=model,
|
|
prompt=prompt,
|
|
max_tokens_to_sample=max_tokens_to_sample
|
|
)
|
|
new_response = {
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": completion.completion,
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print_verbose(f"new response: {new_response}")
|
|
response = new_response
|
|
elif model in litellm.cohere_models:
|
|
cohere_key = os.environ.get("COHERE_API_KEY")
|
|
co = cohere.Client(cohere_key)
|
|
prompt = " ".join([message["content"] for message in messages])
|
|
## LOGGING
|
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
response = co.generate(
|
|
model=model,
|
|
prompt = prompt
|
|
)
|
|
new_response = {
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": response[0].text,
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
}
|
|
response = new_response
|
|
|
|
elif model in litellm.open_ai_chat_completion_models:
|
|
openai.api_type = "openai"
|
|
openai.api_base = "https://api.openai.com/v1"
|
|
openai.api_version = None
|
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
## LOGGING
|
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
response = openai.ChatCompletion.create(
|
|
model=model,
|
|
messages = messages
|
|
)
|
|
elif model in litellm.open_ai_text_completion_models:
|
|
openai.api_type = "openai"
|
|
openai.api_base = "https://api.openai.com/v1"
|
|
openai.api_version = None
|
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
prompt = " ".join([message["content"] for message in messages])
|
|
## LOGGING
|
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
|
## COMPLETION CALL
|
|
response = openai.Completion.create(
|
|
model=model,
|
|
prompt = prompt
|
|
)
|
|
else:
|
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
|
args = locals()
|
|
raise ValueError(f"No valid completion model args passed in - {args}")
|
|
return response
|
|
except Exception as e:
|
|
# log the original exception
|
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
|
## Map to OpenAI Exception
|
|
raise exception_type(model=model, original_exception=e)
|
|
|
|
|
|
### EMBEDDING ENDPOINTS ####################
|
|
@client
|
|
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
|
|
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
|
response = None
|
|
if azure == True:
|
|
# azure configs
|
|
openai.api_type = "azure"
|
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
|
## LOGGING
|
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
|
## EMBEDDING CALL
|
|
response = openai.Embedding.create(input=input, engine=model)
|
|
print_verbose(f"response_value: {str(response)[:50]}")
|
|
elif model in litellm.open_ai_embedding_models:
|
|
openai.api_type = "openai"
|
|
openai.api_base = "https://api.openai.com/v1"
|
|
openai.api_version = None
|
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
## LOGGING
|
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
|
## EMBEDDING CALL
|
|
response = openai.Embedding.create(input=input, model=model)
|
|
print_verbose(f"response_value: {str(response)[:50]}")
|
|
else:
|
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
|
args = locals()
|
|
raise ValueError(f"No valid embedding model args passed in - {args}")
|
|
|
|
return response
|
|
|
|
####### HELPER FUNCTIONS ################
|
|
## Set verbose to true -> ```litellm.set_verbose = True```
|
|
def print_verbose(print_statement):
|
|
if litellm.set_verbose:
|
|
print(f"LiteLLM: {print_statement}")
|
|
if random.random() <= 0.3:
|
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
|
|