forked from phoenix/litellm-mirror
fix(proxy_server.py): return original model response via response headers - /v1/completions
to help devs with debugging
This commit is contained in:
parent
f17dd68df3
commit
15e0099948
4 changed files with 44 additions and 8 deletions
|
@ -1153,7 +1153,9 @@ class OpenAITextCompletion(BaseLLM):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
return TextCompletionResponse(**response_json)
|
response_obj = TextCompletionResponse(**response_json)
|
||||||
|
response_obj._hidden_params.original_response = json.dumps(response_json)
|
||||||
|
return response_obj
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
|
@ -3126,14 +3126,21 @@ async def completion(
|
||||||
|
|
||||||
if hasattr(response, "_hidden_params"):
|
if hasattr(response, "_hidden_params"):
|
||||||
model_id = response._hidden_params.get("model_id", None) or ""
|
model_id = response._hidden_params.get("model_id", None) or ""
|
||||||
|
original_response = (
|
||||||
|
response._hidden_params.get("original_response", None) or ""
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
model_id = ""
|
model_id = ""
|
||||||
|
original_response = ""
|
||||||
|
|
||||||
verbose_proxy_logger.debug("final response: %s", response)
|
verbose_proxy_logger.debug("final response: %s", response)
|
||||||
if (
|
if (
|
||||||
"stream" in data and data["stream"] == True
|
"stream" in data and data["stream"] == True
|
||||||
): # use generate_responses to stream responses
|
): # use generate_responses to stream responses
|
||||||
custom_headers = {"x-litellm-model-id": model_id}
|
custom_headers = {
|
||||||
|
"x-litellm-model-id": model_id,
|
||||||
|
"x-litellm-original-response": original_response,
|
||||||
|
}
|
||||||
selected_data_generator = select_data_generator(
|
selected_data_generator = select_data_generator(
|
||||||
response=response, user_api_key_dict=user_api_key_dict
|
response=response, user_api_key_dict=user_api_key_dict
|
||||||
)
|
)
|
||||||
|
@ -3145,6 +3152,7 @@ async def completion(
|
||||||
)
|
)
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-original-response"] = original_response
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
|
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
|
||||||
|
|
|
@ -4023,7 +4023,7 @@ def test_async_text_completion_stream():
|
||||||
asyncio.run(test_get_response())
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
|
|
||||||
test_async_text_completion_stream()
|
# test_async_text_completion_stream()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -227,6 +227,33 @@ class ChatCompletionDeltaToolCall(OpenAIObject):
|
||||||
index: int
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
class HiddenParams(OpenAIObject):
|
||||||
|
original_response: Optional[str] = None
|
||||||
|
model_id: Optional[str] = None # used in Router for individual deployments
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||||
|
return getattr(self, key, default)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
# Allow dictionary-style access to attributes
|
||||||
|
return getattr(self, key)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
# Allow dictionary-style assignment of attributes
|
||||||
|
setattr(self, key, value)
|
||||||
|
|
||||||
|
def json(self, **kwargs):
|
||||||
|
try:
|
||||||
|
return self.model_dump() # noqa
|
||||||
|
except:
|
||||||
|
# if using pydantic v1
|
||||||
|
return self.dict()
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionMessageToolCall(OpenAIObject):
|
class ChatCompletionMessageToolCall(OpenAIObject):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -729,7 +756,7 @@ class TextCompletionResponse(OpenAIObject):
|
||||||
choices: List[TextChoices]
|
choices: List[TextChoices]
|
||||||
usage: Optional[Usage]
|
usage: Optional[Usage]
|
||||||
_response_ms: Optional[int] = None
|
_response_ms: Optional[int] = None
|
||||||
_hidden_params: Optional[dict] = None
|
_hidden_params: HiddenParams
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -792,9 +819,7 @@ class TextCompletionResponse(OpenAIObject):
|
||||||
self._response_ms = response_ms
|
self._response_ms = response_ms
|
||||||
else:
|
else:
|
||||||
self._response_ms = None
|
self._response_ms = None
|
||||||
self._hidden_params = (
|
self._hidden_params = HiddenParams()
|
||||||
{}
|
|
||||||
) # used in case users want to access the original model response
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
|
@ -1179,7 +1204,8 @@ class Logging:
|
||||||
|
|
||||||
# User Logging -> if you pass in a custom logging function
|
# User Logging -> if you pass in a custom logging function
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
|
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n",
|
||||||
|
log_level="INFO",
|
||||||
)
|
)
|
||||||
if self.logger_fn and callable(self.logger_fn):
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue