diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index f5293ece9..93f4c2d95 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -1153,7 +1153,9 @@ class OpenAITextCompletion(BaseLLM): }, ) ## RESPONSE OBJECT - return TextCompletionResponse(**response_json) + response_obj = TextCompletionResponse(**response_json) + response_obj._hidden_params.original_response = json.dumps(response_json) + return response_obj except Exception as e: raise e diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1cf22c9cd..5ef3f454f 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3126,14 +3126,21 @@ async def completion( if hasattr(response, "_hidden_params"): model_id = response._hidden_params.get("model_id", None) or "" + original_response = ( + response._hidden_params.get("original_response", None) or "" + ) else: model_id = "" + original_response = "" verbose_proxy_logger.debug("final response: %s", response) if ( "stream" in data and data["stream"] == True ): # use generate_responses to stream responses - custom_headers = {"x-litellm-model-id": model_id} + custom_headers = { + "x-litellm-model-id": model_id, + "x-litellm-original-response": original_response, + } selected_data_generator = select_data_generator( response=response, user_api_key_dict=user_api_key_dict ) @@ -3145,6 +3152,7 @@ async def completion( ) fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-original-response"] = original_response return response except Exception as e: verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY") diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index 9987110e2..84e3bb5de 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -4023,7 +4023,7 @@ def test_async_text_completion_stream(): asyncio.run(test_get_response()) -test_async_text_completion_stream() +# test_async_text_completion_stream() @pytest.mark.asyncio diff --git a/litellm/utils.py b/litellm/utils.py index 52660cec3..e92f2068a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -227,6 +227,33 @@ class ChatCompletionDeltaToolCall(OpenAIObject): index: int +class HiddenParams(OpenAIObject): + original_response: Optional[str] = None + model_id: Optional[str] = None # used in Router for individual deployments + + class Config: + extra = "allow" + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + class ChatCompletionMessageToolCall(OpenAIObject): def __init__( self, @@ -729,7 +756,7 @@ class TextCompletionResponse(OpenAIObject): choices: List[TextChoices] usage: Optional[Usage] _response_ms: Optional[int] = None - _hidden_params: Optional[dict] = None + _hidden_params: HiddenParams def __init__( self, @@ -792,9 +819,7 @@ class TextCompletionResponse(OpenAIObject): self._response_ms = response_ms else: self._response_ms = None - self._hidden_params = ( - {} - ) # used in case users want to access the original model response + self._hidden_params = HiddenParams() def __contains__(self, key): # Define custom behavior for the 'in' operator @@ -1179,7 +1204,8 @@ class Logging: # User Logging -> if you pass in a custom logging function print_verbose( - f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n" + f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n", + log_level="INFO", ) if self.logger_fn and callable(self.logger_fn): try: