mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
adding anthropic llm class - handles sync + stream
This commit is contained in:
parent
e6da2f8bf7
commit
bc767cc42a
27 changed files with 219 additions and 693 deletions
104
litellm/main.py
104
litellm/main.py
|
@ -4,11 +4,12 @@ from functools import partial
|
|||
import dotenv, traceback, random, asyncio, time
|
||||
from copy import deepcopy
|
||||
import litellm
|
||||
from litellm import client, logging, exception_type, timeout, get_optional_params
|
||||
from litellm import client, logging, exception_type, timeout, get_optional_params, get_litellm_params
|
||||
from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
|
||||
from .llms.anthropic import AnthropicLLM
|
||||
import tiktoken
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
new_response = {
|
||||
|
@ -38,14 +39,13 @@ async def acompletion(*args, **kwargs):
|
|||
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
||||
@timeout(600) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||
def completion(
|
||||
messages, model="gpt-3.5-turbo",# required params
|
||||
model, messages,# required params
|
||||
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
||||
functions=[], function_call="", # optional params
|
||||
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
||||
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
||||
# Optional liteLLM function params
|
||||
*, return_async=False, api_key=None, force_timeout=600, azure=False, logger_fn=None, verbose=False,
|
||||
hugging_face = False, replicate=False,together_ai = False, custom_llm_provider=None, custom_api_base=None
|
||||
*, return_async=False, api_key=None, force_timeout=600, logger_fn=None, verbose=False, custom_llm_provider=None, custom_api_base=None
|
||||
):
|
||||
try:
|
||||
global new_response
|
||||
|
@ -57,9 +57,15 @@ def completion(
|
|||
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
||||
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
|
||||
# params to identify the model
|
||||
model=model, replicate=replicate, hugging_face=hugging_face, together_ai=together_ai
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
if azure == True or custom_llm_provider == "azure": # [TODO]: remove azure=True flag, move to 'custom_llm_provider' approach
|
||||
# For logging - save the values of the litellm-specific params passed in
|
||||
litellm_params = get_litellm_params(
|
||||
return_async=return_async, api_key=api_key, force_timeout=force_timeout,
|
||||
logger_fn=logger_fn, verbose=verbose, custom_llm_provider=custom_llm_provider,
|
||||
custom_api_base=custom_api_base)
|
||||
|
||||
if custom_llm_provider == "azure":
|
||||
# azure configs
|
||||
openai.api_type = "azure"
|
||||
openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
|
||||
|
@ -72,7 +78,7 @@ def completion(
|
|||
else:
|
||||
openai.api_key = get_secret("AZURE_API_KEY")
|
||||
## LOGGING
|
||||
logging(model=model, input=messages, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -102,7 +108,7 @@ def completion(
|
|||
else:
|
||||
openai.api_key = get_secret("OPENAI_API_KEY")
|
||||
## LOGGING
|
||||
logging(model=model, input=messages, additional_args=args, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=messages, additional_args=args, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -131,7 +137,7 @@ def completion(
|
|||
openai.organization = litellm.organization
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.Completion.create(
|
||||
|
@ -146,14 +152,14 @@ def completion(
|
|||
)
|
||||
completion_response = response["choices"]["text"]
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
model_response["created"] = response["created"]
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = response["usage"]
|
||||
response = model_response
|
||||
elif "replicate" in model or replicate == True or custom_llm_provider == "replicate":
|
||||
elif "replicate" in model or custom_llm_provider == "replicate":
|
||||
# import replicate/if it fails then pip install replicate
|
||||
install_and_import("replicate")
|
||||
import replicate
|
||||
|
@ -168,11 +174,11 @@ def completion(
|
|||
os.environ["REPLICATE_API_TOKEN"] = litellm.replicate_key
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
input = {"prompt": prompt}
|
||||
if max_tokens != float('inf'):
|
||||
if "max_tokens" in optional_params:
|
||||
input["max_length"] = max_tokens # for t5 models
|
||||
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||
## LOGGING
|
||||
logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||
logging(model=model, input=input, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
output = replicate.run(
|
||||
model,
|
||||
|
@ -187,7 +193,7 @@ def completion(
|
|||
response += item
|
||||
completion_response = response
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(encoding.encode(completion_response))
|
||||
## RESPONSE OBJECT
|
||||
|
@ -201,59 +207,13 @@ def completion(
|
|||
}
|
||||
response = model_response
|
||||
elif model in litellm.anthropic_models:
|
||||
# import anthropic/if it fails then pip install anthropic
|
||||
install_and_import("anthropic")
|
||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||
|
||||
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||
if api_key:
|
||||
os.environ["ANTHROPIC_API_KEY"] = api_key
|
||||
elif litellm.anthropic_key:
|
||||
os.environ["ANTHROPIC_API_KEY"] = litellm.anthropic_key
|
||||
prompt = f"{HUMAN_PROMPT}"
|
||||
for message in messages:
|
||||
if "role" in message:
|
||||
if message["role"] == "user":
|
||||
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||
else:
|
||||
prompt += f"{AI_PROMPT}{message['content']}"
|
||||
else:
|
||||
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||
prompt += f"{AI_PROMPT}"
|
||||
anthropic = Anthropic()
|
||||
if max_tokens != float('inf'):
|
||||
max_tokens_to_sample = max_tokens
|
||||
else:
|
||||
max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
completion = anthropic.completions.create(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
max_tokens_to_sample=max_tokens_to_sample,
|
||||
**optional_params
|
||||
)
|
||||
anthropic_key = api_key if api_key is not None else litellm.anthropic_key
|
||||
anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
|
||||
model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
|
||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
||||
# don't try to access stream object,
|
||||
response = CustomStreamWrapper(completion, model)
|
||||
response = CustomStreamWrapper(model_response, model)
|
||||
return response
|
||||
|
||||
completion_response = completion.completion
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
prompt_tokens = anthropic.count_tokens(prompt)
|
||||
completion_tokens = anthropic.count_tokens(completion_response)
|
||||
## RESPONSE OBJECT
|
||||
print_verbose(f"raw model_response: {model_response}")
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens
|
||||
}
|
||||
response = model_response
|
||||
|
||||
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
||||
|
@ -270,7 +230,7 @@ def completion(
|
|||
else:
|
||||
openai.api_key = get_secret("OPENROUTER_API_KEY")
|
||||
## LOGGING
|
||||
logging(model=model, input=messages, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -311,7 +271,7 @@ def completion(
|
|||
co = cohere.Client(cohere_key)
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
## COMPLETION CALL
|
||||
response = co.generate(
|
||||
model=model,
|
||||
|
@ -364,7 +324,7 @@ def completion(
|
|||
"total_tokens": prompt_tokens + completion_tokens
|
||||
}
|
||||
response = model_response
|
||||
elif together_ai == True or custom_llm_provider == "together_ai":
|
||||
elif custom_llm_provider == "together_ai":
|
||||
import requests
|
||||
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN")
|
||||
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
||||
|
@ -410,7 +370,7 @@ def completion(
|
|||
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
|
||||
chat_model = ChatModel.from_pretrained(model)
|
||||
|
||||
|
@ -419,7 +379,7 @@ def completion(
|
|||
completion_response = chat.send_message(prompt, **optional_params)
|
||||
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
|
@ -428,13 +388,13 @@ def completion(
|
|||
response = model_response
|
||||
else:
|
||||
## LOGGING
|
||||
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||
logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
|
||||
args = locals()
|
||||
raise ValueError(f"Invalid completion model args passed in. Check your input - {args}")
|
||||
return response
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
||||
logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
||||
## Map to OpenAI Exception
|
||||
raise exception_type(model=model, original_exception=e)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue