forked from phoenix/litellm-mirror
fix(azure.py-+-proxy_server.py): fix function calling response object + support router on proxy
This commit is contained in:
parent
8f8b35eb34
commit
8c104e9c6a
8 changed files with 54 additions and 59 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -14,3 +14,4 @@ litellm/proxy/api_log.json
|
|||
.idea/
|
||||
router_config.yaml
|
||||
litellm_server/config.yaml
|
||||
litellm/proxy/_secret_config.yaml
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Reliability - Fallbacks, Azure Deployments, etc.
|
||||
|
||||
## Manage Multiple Deployments
|
||||
# Manage Multiple Deployments
|
||||
|
||||
Use this if you're trying to load-balance across multiple deployments (e.g. Azure/OpenAI).
|
||||
|
||||
|
|
|
@ -110,12 +110,13 @@ class APIError(APIError): # type: ignore
|
|||
|
||||
# raised if an invalid request (not get, delete, put, post) is made
|
||||
class APIConnectionError(APIConnectionError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model):
|
||||
def __init__(self, message, llm_provider, model, request: httpx.Request):
|
||||
self.message = message
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
super().__init__(
|
||||
self.message
|
||||
message=self.message,
|
||||
request=request
|
||||
)
|
||||
|
||||
class OpenAIError(OpenAIError): # type: ignore
|
||||
|
|
|
@ -195,7 +195,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
method="POST"
|
||||
) as response:
|
||||
if response.status_code != 200:
|
||||
raise AzureOpenAIError(status_code=response.status_code, message=response.text)
|
||||
raise AzureOpenAIError(status_code=response.status_code, message="An error occurred while streaming")
|
||||
|
||||
completion_stream = response.iter_lines()
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=completion_stream, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
|
||||
|
|
|
@ -2,7 +2,7 @@ import os
|
|||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time
|
||||
import time, httpx
|
||||
from typing import Callable, Any
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
@ -11,6 +11,8 @@ class VLLMError(Exception):
|
|||
def __init__(self, status_code, message):
|
||||
self.status_code = status_code
|
||||
self.message = message
|
||||
self.request = httpx.Request(method="POST", url="http://0.0.0.0:8000")
|
||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||
super().__init__(
|
||||
self.message
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
|
|
@ -453,25 +453,18 @@ def litellm_completion(*args, **kwargs):
|
|||
kwargs["max_tokens"] = user_max_tokens
|
||||
if user_api_base:
|
||||
kwargs["api_base"] = user_api_base
|
||||
## CHECK CONFIG ##
|
||||
if llm_model_list != None:
|
||||
llm_models = [m["model_name"] for m in llm_model_list]
|
||||
if kwargs["model"] in llm_models:
|
||||
for m in llm_model_list:
|
||||
if kwargs["model"] == m["model_name"]: # if user has specified a config, this will use the config
|
||||
for key, value in m["litellm_params"].items():
|
||||
kwargs[key] = value
|
||||
break
|
||||
else:
|
||||
print_verbose("user sent model not in config, using default config model")
|
||||
default_model = llm_model_list[0]
|
||||
litellm_params = default_model.get('litellm_params', None)
|
||||
for key, value in litellm_params.items():
|
||||
kwargs[key] = value
|
||||
if call_type == "chat_completion":
|
||||
response = litellm.completion(*args, **kwargs)
|
||||
elif call_type == "text_completion":
|
||||
response = litellm.text_completion(*args, **kwargs)
|
||||
## ROUTE TO CORRECT ENDPOINT ##
|
||||
router_model_names = [m["model_name"] for m in llm_model_list]
|
||||
if llm_router is not None and kwargs["model"] in router_model_names: # model in router model list
|
||||
if call_type == "chat_completion":
|
||||
response = llm_router.completion(*args, **kwargs)
|
||||
elif call_type == "text_completion":
|
||||
response = llm_router.text_completion(*args, **kwargs)
|
||||
else:
|
||||
if call_type == "chat_completion":
|
||||
response = litellm.completion(*args, **kwargs)
|
||||
elif call_type == "text_completion":
|
||||
response = litellm.text_completion(*args, **kwargs)
|
||||
if 'stream' in kwargs and kwargs['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
|
|
|
@ -579,36 +579,34 @@ def test_completion_openai_with_more_optional_params():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_openai_with_more_optional_params()
|
||||
# def test_completion_openai_azure_with_functions():
|
||||
# function1 = [
|
||||
# {
|
||||
# "name": "get_current_weather",
|
||||
# "description": "Get the current weather in a given location",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "location": {
|
||||
# "type": "string",
|
||||
# "description": "The city and state, e.g. San Francisco, CA",
|
||||
# },
|
||||
# "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
# },
|
||||
# "required": ["location"],
|
||||
# },
|
||||
# }
|
||||
# ]
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="azure/chatgpt-functioncalling", messages=messages, stream=True
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
# for chunk in response:
|
||||
# print(chunk)
|
||||
# print(chunk["choices"][0]["finish_reason"])
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_openai_azure_with_functions()
|
||||
def test_completion_openai_azure_with_functions():
|
||||
function1 = [
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
try:
|
||||
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
|
||||
response = completion(
|
||||
model="azure/chatgpt-functioncalling", messages=messages, functions=function1
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
test_completion_openai_azure_with_functions()
|
||||
|
||||
|
||||
def test_completion_azure():
|
||||
|
|
|
@ -2896,7 +2896,7 @@ def convert_to_model_response_object(response_object: Optional[dict]=None, model
|
|||
raise Exception("Error in response object format")
|
||||
choice_list=[]
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
message = Message(content=choice["message"]["content"], role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
|
||||
message = Message(content=choice["message"].get("content", None), role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason == None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
|
@ -4018,7 +4018,8 @@ def exception_type(
|
|||
raise APIConnectionError(
|
||||
message=f"VLLMException - {original_exception.message}",
|
||||
llm_provider="vllm",
|
||||
model=model
|
||||
model=model,
|
||||
request=original_exception.request
|
||||
)
|
||||
elif custom_llm_provider == "azure":
|
||||
if "This model's maximum context length is" in error_str:
|
||||
|
@ -4093,7 +4094,8 @@ def exception_type(
|
|||
raise APIConnectionError(
|
||||
message=f"{str(original_exception)}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model
|
||||
model=model,
|
||||
request=original_exception.request
|
||||
)
|
||||
except Exception as e:
|
||||
# LOGGING
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue