fix(azure.py-+-proxy_server.py): fix function calling response object + support router on proxy

This commit is contained in:
Krrish Dholakia 2023-11-15 13:15:09 -08:00
parent 8f8b35eb34
commit 8c104e9c6a
8 changed files with 54 additions and 59 deletions

1
.gitignore vendored
View file

@ -14,3 +14,4 @@ litellm/proxy/api_log.json
.idea/
router_config.yaml
litellm_server/config.yaml
litellm/proxy/_secret_config.yaml

View file

@ -1,8 +1,6 @@
import Image from '@theme/IdealImage';
# Reliability - Fallbacks, Azure Deployments, etc.
## Manage Multiple Deployments
# Manage Multiple Deployments
Use this if you're trying to load-balance across multiple deployments (e.g. Azure/OpenAI).

View file

@ -110,12 +110,13 @@ class APIError(APIError): # type: ignore
# raised if an invalid request (not get, delete, put, post) is made
class APIConnectionError(APIConnectionError): # type: ignore
def __init__(self, message, llm_provider, model):
def __init__(self, message, llm_provider, model, request: httpx.Request):
self.message = message
self.llm_provider = llm_provider
self.model = model
super().__init__(
self.message
message=self.message,
request=request
)
class OpenAIError(OpenAIError): # type: ignore

View file

@ -195,7 +195,7 @@ class AzureChatCompletion(BaseLLM):
method="POST"
) as response:
if response.status_code != 200:
raise AzureOpenAIError(status_code=response.status_code, message=response.text)
raise AzureOpenAIError(status_code=response.status_code, message="An error occurred while streaming")
completion_stream = response.iter_lines()
streamwrapper = CustomStreamWrapper(completion_stream=completion_stream, model=model, custom_llm_provider="azure",logging_obj=logging_obj)

View file

@ -2,7 +2,7 @@ import os
import json
from enum import Enum
import requests
import time
import time, httpx
from typing import Callable, Any
from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt
@ -11,6 +11,8 @@ class VLLMError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
self.request = httpx.Request(method="POST", url="http://0.0.0.0:8000")
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs

View file

@ -453,25 +453,18 @@ def litellm_completion(*args, **kwargs):
kwargs["max_tokens"] = user_max_tokens
if user_api_base:
kwargs["api_base"] = user_api_base
## CHECK CONFIG ##
if llm_model_list != None:
llm_models = [m["model_name"] for m in llm_model_list]
if kwargs["model"] in llm_models:
for m in llm_model_list:
if kwargs["model"] == m["model_name"]: # if user has specified a config, this will use the config
for key, value in m["litellm_params"].items():
kwargs[key] = value
break
else:
print_verbose("user sent model not in config, using default config model")
default_model = llm_model_list[0]
litellm_params = default_model.get('litellm_params', None)
for key, value in litellm_params.items():
kwargs[key] = value
if call_type == "chat_completion":
response = litellm.completion(*args, **kwargs)
elif call_type == "text_completion":
response = litellm.text_completion(*args, **kwargs)
## ROUTE TO CORRECT ENDPOINT ##
router_model_names = [m["model_name"] for m in llm_model_list]
if llm_router is not None and kwargs["model"] in router_model_names: # model in router model list
if call_type == "chat_completion":
response = llm_router.completion(*args, **kwargs)
elif call_type == "text_completion":
response = llm_router.text_completion(*args, **kwargs)
else:
if call_type == "chat_completion":
response = litellm.completion(*args, **kwargs)
elif call_type == "text_completion":
response = litellm.text_completion(*args, **kwargs)
if 'stream' in kwargs and kwargs['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response

View file

@ -579,36 +579,34 @@ def test_completion_openai_with_more_optional_params():
pytest.fail(f"Error occurred: {e}")
# test_completion_openai_with_more_optional_params()
# def test_completion_openai_azure_with_functions():
# function1 = [
# {
# "name": "get_current_weather",
# "description": "Get the current weather in a given location",
# "parameters": {
# "type": "object",
# "properties": {
# "location": {
# "type": "string",
# "description": "The city and state, e.g. San Francisco, CA",
# },
# "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
# },
# "required": ["location"],
# },
# }
# ]
# try:
# response = completion(
# model="azure/chatgpt-functioncalling", messages=messages, stream=True
# )
# # Add any assertions here to check the response
# print(response)
# for chunk in response:
# print(chunk)
# print(chunk["choices"][0]["finish_reason"])
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_openai_azure_with_functions()
def test_completion_openai_azure_with_functions():
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
response = completion(
model="azure/chatgpt-functioncalling", messages=messages, functions=function1
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_openai_azure_with_functions()
def test_completion_azure():

View file

@ -2896,7 +2896,7 @@ def convert_to_model_response_object(response_object: Optional[dict]=None, model
raise Exception("Error in response object format")
choice_list=[]
for idx, choice in enumerate(response_object["choices"]):
message = Message(content=choice["message"]["content"], role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
message = Message(content=choice["message"].get("content", None), role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
finish_reason = choice.get("finish_reason", None)
if finish_reason == None:
# gpt-4 vision can return 'finish_reason' or 'finish_details'
@ -4018,7 +4018,8 @@ def exception_type(
raise APIConnectionError(
message=f"VLLMException - {original_exception.message}",
llm_provider="vllm",
model=model
model=model,
request=original_exception.request
)
elif custom_llm_provider == "azure":
if "This model's maximum context length is" in error_str:
@ -4093,7 +4094,8 @@ def exception_type(
raise APIConnectionError(
message=f"{str(original_exception)}",
llm_provider=custom_llm_provider,
model=model
model=model,
request=original_exception.request
)
except Exception as e:
# LOGGING