remove init AnthropicClient for completion calls

This commit is contained in:
ishaan-jaff 2023-09-04 09:34:15 -07:00
parent 2dc1c35a05
commit bc065f08df
2 changed files with 119 additions and 136 deletions

View file

@ -1,16 +1,15 @@
import os, json
import os
import json
from enum import Enum
import requests
import time
from typing import Callable
from litellm.utils import ModelResponse
class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman:"
AI_PROMPT = "\n\nAssistant:"
class AnthropicError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
@ -19,45 +18,33 @@ class AnthropicError(Exception):
self.message
) # Call the base class constructor with the parameters it needs
class AnthropicLLM:
def __init__(
self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None
):
self.encoding = encoding
self.default_max_tokens_to_sample = default_max_tokens_to_sample
self.completion_url = "https://api.anthropic.com/v1/complete"
self.api_key = api_key
self.logging_obj = logging_obj
self.validate_environment(api_key=api_key)
def validate_environment(
self, api_key
): # set up the environment required to run the model
# set the api key
if self.api_key == None:
# makes headers for API call
def validate_environment(api_key):
if api_key is None:
raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
self.api_key = api_key
self.headers = {
headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"content-type": "application/json",
"x-api-key": self.api_key,
"x-api-key": api_key,
}
return headers
def completion(
self,
def completion(
model: str,
messages: list,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
api_key,
logging_obj,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
model = model
):
headers = validate_environment(api_key)
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
for message in messages:
if "role" in message:
@ -72,12 +59,10 @@ class AnthropicLLM:
else:
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
prompt += f"{AnthropicConstants.AI_PROMPT.value}"
if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
"inf"
):
if "max_tokens" in optional_params and optional_params["max_tokens"] != float("inf"):
max_tokens = optional_params["max_tokens"]
else:
max_tokens = self.default_max_tokens_to_sample
max_tokens = 256 # required anthropic param, default to 256 if user does not provide an input
data = {
"model": model,
"prompt": prompt,
@ -86,28 +71,29 @@ class AnthropicLLM:
}
## LOGGING
self.logging_obj.pre_call(
logging_obj.pre_call(
input=prompt,
api_key=self.api_key,
api_key=api_key,
additional_args={"complete_input_dict": data},
)
## COMPLETION CALL
if "stream" in optional_params and optional_params["stream"] == True:
response = requests.post(
self.completion_url,
headers=self.headers,
"https://api.anthropic.com/v1/complete",
headers=headers,
data=json.dumps(data),
stream=optional_params["stream"],
)
return response.iter_lines()
else:
response = requests.post(
self.completion_url, headers=self.headers, data=json.dumps(data)
"https://api.anthropic.com/v1/complete", headers=headers, data=json.dumps(data)
)
## LOGGING
self.logging_obj.post_call(
logging_obj.post_call(
input=prompt,
api_key=self.api_key,
api_key=api_key,
original_response=response.text,
additional_args={"complete_input_dict": data},
)
@ -116,23 +102,25 @@ class AnthropicLLM:
try:
completion_response = response.json()
except:
raise AnthropicError(message=response.text, status_code=response.status_code)
raise AnthropicError(
message=response.text, status_code=response.status_code
)
if "error" in completion_response:
raise AnthropicError(
message=str(completion_response["error"]),
status_code=response.status_code,
)
else:
model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
model_response["choices"][0]["message"]["content"] = completion_response[
"completion"
]
## CALCULATING USAGE
prompt_tokens = len(
self.encoding.encode(prompt)
encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the anthropic tokenizer here
model_response["created"] = time.time()
@ -144,7 +132,6 @@ class AnthropicLLM:
}
return model_response
def embedding(
self,
): # logic for parsing in - calling - parsing out model embedding calls
def embedding():
# logic for parsing in - calling - parsing out model embedding calls
pass

View file

@ -19,7 +19,7 @@ from litellm.utils import (
read_config_args,
completion_with_fallbacks,
)
from .llms.anthropic import AnthropicLLM
from .llms import anthropic
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
from .llms.baseten import BasetenLLM
from .llms.ai21 import AI21LLM
@ -61,7 +61,6 @@ async def acompletion(*args, **kwargs):
@client
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
@timeout( # type: ignore
600
) ## set timeouts, in case calls hang (e.g. Azure) - default is 600s, override with `force_timeout`
@ -79,7 +78,6 @@ def completion(
max_tokens=float("inf"),
presence_penalty=0,
frequency_penalty=0,
num_beams=1,
logit_bias={},
user="",
deployment_id=None,
@ -89,6 +87,7 @@ def completion(
api_key=None,
api_version=None,
force_timeout=600,
num_beams=1,
logger_fn=None,
verbose=False,
azure=False,
@ -407,13 +406,7 @@ def completion(
anthropic_key = (
api_key or litellm.anthropic_key or os.environ.get("ANTHROPIC_API_KEY")
)
anthropic_client = AnthropicLLM(
encoding=encoding,
default_max_tokens_to_sample=litellm.max_tokens,
api_key=anthropic_key,
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements
)
model_response = anthropic_client.completion(
model_response = anthropic.completion(
model=model,
messages=messages,
model_response=model_response,
@ -421,6 +414,9 @@ def completion(
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
encoding=encoding, # for calculating input/output tokens
api_key=anthropic_key,
logging_obj=logging,
)
if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object,