mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
fix(openai.p): adding support for exception mapping for openai-compatible apis via http calls
This commit is contained in:
parent
b455bdfff1
commit
ec5e7aa4a9
8 changed files with 4943 additions and 32 deletions
Binary file not shown.
Binary file not shown.
|
@ -246,30 +246,53 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
logger_fn=None):
|
logger_fn=None):
|
||||||
super().completion()
|
super().completion()
|
||||||
headers = self.validate_environment(api_key=api_key)
|
headers = self.validate_environment(api_key=api_key)
|
||||||
data = {
|
|
||||||
"messages": messages,
|
for _ in range(2): # if call fails due to alternating messages, retry with reformatted message
|
||||||
**optional_params
|
data = {
|
||||||
}
|
"model": model,
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
"messages": messages,
|
||||||
response = self._client_session.post(
|
**optional_params
|
||||||
url=f"{api_base}/chat/completions",
|
}
|
||||||
json=data,
|
try:
|
||||||
headers=headers,
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
stream=optional_params["stream"]
|
response = self._client_session.post(
|
||||||
)
|
url=f"{api_base}/chat/completions",
|
||||||
if response.status_code != 200:
|
json=data,
|
||||||
raise CustomOpenAIError(status_code=response.status_code, message=response.text)
|
headers=headers,
|
||||||
|
stream=optional_params["stream"]
|
||||||
## RESPONSE OBJECT
|
)
|
||||||
return response.iter_lines()
|
if response.status_code != 200:
|
||||||
else:
|
raise CustomOpenAIError(status_code=response.status_code, message=response.text)
|
||||||
response = self._client_session.post(
|
|
||||||
url=f"{api_base}/chat/completions",
|
## RESPONSE OBJECT
|
||||||
json=data,
|
return response.iter_lines()
|
||||||
headers=headers,
|
else:
|
||||||
)
|
response = self._client_session.post(
|
||||||
if response.status_code != 200:
|
url=f"{api_base}/chat/completions",
|
||||||
raise CustomOpenAIError(status_code=response.status_code, message=response.text)
|
json=data,
|
||||||
|
headers=headers,
|
||||||
## RESPONSE OBJECT
|
)
|
||||||
return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
|
if response.status_code != 200:
|
||||||
|
raise CustomOpenAIError(status_code=response.status_code, message=response.text)
|
||||||
|
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
|
||||||
|
except Exception as e:
|
||||||
|
if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e):
|
||||||
|
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
|
||||||
|
new_messages = []
|
||||||
|
for i in range(len(messages)-1):
|
||||||
|
new_messages.append(messages[i])
|
||||||
|
if messages[i]["role"] == messages[i+1]["role"]:
|
||||||
|
if messages[i]["role"] == "user":
|
||||||
|
new_messages.append({"role": "assistant", "content": ""})
|
||||||
|
else:
|
||||||
|
new_messages.append({"role": "user", "content": ""})
|
||||||
|
new_messages.append(messages[-1])
|
||||||
|
messages = new_messages
|
||||||
|
elif "Last message must have role `user`" in str(e):
|
||||||
|
new_messages = messages
|
||||||
|
new_messages.append({"role": "user", "content": ""})
|
||||||
|
messages = new_messages
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
|
@ -445,7 +445,7 @@ def completion(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging)
|
response = CustomStreamWrapper(response, model, custom_llm_provider=custom_llm_provider, logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -332,7 +332,6 @@ def logger(
|
||||||
end_time=None # start/end time
|
end_time=None # start/end time
|
||||||
):
|
):
|
||||||
log_event_type = kwargs['log_event_type']
|
log_event_type = kwargs['log_event_type']
|
||||||
print(f"REACHES LOGGER: {log_event_type}")
|
|
||||||
try:
|
try:
|
||||||
if log_event_type == 'pre_api_call':
|
if log_event_type == 'pre_api_call':
|
||||||
inference_params = copy.deepcopy(kwargs)
|
inference_params = copy.deepcopy(kwargs)
|
||||||
|
@ -355,7 +354,6 @@ def logger(
|
||||||
with open(log_file, 'w') as f:
|
with open(log_file, 'w') as f:
|
||||||
json.dump(existing_data, f, indent=2)
|
json.dump(existing_data, f, indent=2)
|
||||||
elif log_event_type == 'post_api_call':
|
elif log_event_type == 'post_api_call':
|
||||||
print(f"post api call kwargs: {kwargs}")
|
|
||||||
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
||||||
inference_params = copy.deepcopy(kwargs)
|
inference_params = copy.deepcopy(kwargs)
|
||||||
timestamp = inference_params.pop('start_time')
|
timestamp = inference_params.pop('start_time')
|
||||||
|
@ -438,7 +436,6 @@ async def completion(request: Request):
|
||||||
@router.post("/chat/completions")
|
@router.post("/chat/completions")
|
||||||
async def chat_completion(request: Request):
|
async def chat_completion(request: Request):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
print(f"data passed in: {data}")
|
|
||||||
response = litellm_completion(data, type="chat_completion")
|
response = litellm_completion(data, type="chat_completion")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
@ -108,6 +108,28 @@ def test_completion_with_litellm_call_id():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_perplexity_api():
|
||||||
|
try:
|
||||||
|
litellm.set_verbose=True
|
||||||
|
messages=[{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You're a good bot"
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
}]
|
||||||
|
response = completion(
|
||||||
|
model="mistral-7b-instruct",
|
||||||
|
messages=messages,
|
||||||
|
api_base="https://api.perplexity.ai")
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_completion_perplexity_api()
|
||||||
# commenting out as this is a flaky test on circle ci
|
# commenting out as this is a flaky test on circle ci
|
||||||
# def test_completion_nlp_cloud():
|
# def test_completion_nlp_cloud():
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -1419,7 +1419,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
|
||||||
if api_base:
|
if api_base:
|
||||||
for endpoint in litellm.openai_compatible_endpoints:
|
for endpoint in litellm.openai_compatible_endpoints:
|
||||||
if endpoint in api_base:
|
if endpoint in api_base:
|
||||||
custom_llm_provider = "openai"
|
custom_llm_provider = "custom_openai"
|
||||||
|
if endpoint == "api.perplexity.ai":
|
||||||
|
litellm.api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
||||||
return model, custom_llm_provider
|
return model, custom_llm_provider
|
||||||
|
|
||||||
# check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
|
# check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
|
||||||
|
@ -2936,6 +2938,45 @@ def exception_type(
|
||||||
elif custom_llm_provider == "ollama":
|
elif custom_llm_provider == "ollama":
|
||||||
if "no attribute 'async_get_ollama_response_stream" in error_str:
|
if "no attribute 'async_get_ollama_response_stream" in error_str:
|
||||||
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
|
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
|
||||||
|
elif custom_llm_provider == "custom_openai":
|
||||||
|
if hasattr(original_exception, "status_code"):
|
||||||
|
exception_mapping_worked = True
|
||||||
|
if original_exception.status_code == 401:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise AuthenticationError(
|
||||||
|
message=f"CustomOpenAIException - {original_exception.message}",
|
||||||
|
llm_provider="custom_openai",
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 408:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise Timeout(
|
||||||
|
message=f"CustomOpenAIException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="custom_openai"
|
||||||
|
)
|
||||||
|
if original_exception.status_code == 422:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"CustomOpenAIException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="custom_openai",
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 429:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise RateLimitError(
|
||||||
|
message=f"CustomOpenAIException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="custom_openai",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise APIError(
|
||||||
|
status_code=original_exception.status_code,
|
||||||
|
message=f"CustomOpenAIException - {original_exception.message}",
|
||||||
|
llm_provider="custom_openai",
|
||||||
|
model=model
|
||||||
|
)
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
|
raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3205,6 +3246,30 @@ class CustomStreamWrapper:
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
def handle_custom_openai_chat_completion_chunk(self, chunk):
|
||||||
|
try:
|
||||||
|
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = None
|
||||||
|
if str_line.startswith("data:"):
|
||||||
|
data_json = json.loads(str_line[5:])
|
||||||
|
print(f"delta content: {data_json['choices'][0]['delta']}")
|
||||||
|
text = data_json["choices"][0]["delta"].get("content", "")
|
||||||
|
if data_json["choices"][0].get("finish_reason", None):
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = data_json["choices"][0]["finish_reason"]
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
elif "error" in str_line:
|
||||||
|
raise ValueError(f"Unable to parse response. Original response: {str_line}")
|
||||||
|
else:
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def handle_openai_text_completion_chunk(self, chunk):
|
def handle_openai_text_completion_chunk(self, chunk):
|
||||||
try:
|
try:
|
||||||
return chunk["choices"][0]["text"]
|
return chunk["choices"][0]["text"]
|
||||||
|
@ -3401,6 +3466,13 @@ class CustomStreamWrapper:
|
||||||
if "error" in chunk:
|
if "error" in chunk:
|
||||||
exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=chunk["error"])
|
exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=chunk["error"])
|
||||||
completion_obj = chunk
|
completion_obj = chunk
|
||||||
|
elif self.custom_llm_provider == "custom_openai":
|
||||||
|
chunk = next(self.completion_stream)
|
||||||
|
response_obj = self.handle_custom_openai_chat_completion_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
print(f"completion obj content: {completion_obj['content']}")
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
else: # openai chat/azure models
|
else: # openai chat/azure models
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
model_response = chunk
|
model_response = chunk
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue