diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 6c91bd15e..ec5c55855 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -960,7 +960,7 @@ class OpenAIChatCompletion(BaseLLM): return convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, - response_headers=headers, + _response_headers=headers, ) except Exception as e: if print_verbose is not None: @@ -1059,7 +1059,7 @@ class OpenAIChatCompletion(BaseLLM): response_object=stringified_response, model_response_object=model_response, hidden_params={"headers": headers}, - response_headers=headers, + _response_headers=headers, ) except Exception as e: raise e @@ -1110,7 +1110,7 @@ class OpenAIChatCompletion(BaseLLM): custom_llm_provider="openai", logging_obj=logging_obj, stream_options=data.get("stream_options", None), - response_headers=headers, + _response_headers=headers, ) return streamwrapper @@ -1160,7 +1160,7 @@ class OpenAIChatCompletion(BaseLLM): custom_llm_provider="openai", logging_obj=logging_obj, stream_options=data.get("stream_options", None), - response_headers=headers, + _response_headers=headers, ) return streamwrapper except ( @@ -1269,7 +1269,7 @@ class OpenAIChatCompletion(BaseLLM): response_object=stringified_response, model_response_object=model_response, response_type="embedding", - response_headers=headers, + _response_headers=headers, ) # type: ignore except Exception as e: ## LOGGING @@ -1348,7 +1348,7 @@ class OpenAIChatCompletion(BaseLLM): return convert_to_model_response_object( response_object=sync_embedding_response.model_dump(), model_response_object=model_response, - response_headers=headers, + _response_headers=headers, response_type="embedding", ) # type: ignore except OpenAIError as e: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index baad4f3bf..8af8e51b2 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1331,9 +1331,9 @@ def test_completion_openai_response_headers(): print(f"response: {response}") - print("response_headers=", response.response_headers) - assert response.response_headers is not None - assert "x-ratelimit-remaining-tokens" in response.response_headers + print("response_headers=", response._response_headers) + assert response._response_headers is not None + assert "x-ratelimit-remaining-tokens" in response._response_headers # /chat/completion with streaming @@ -1342,7 +1342,7 @@ def test_completion_openai_response_headers(): messages=messages, stream=True, ) - response_headers = streaming_response.response_headers + response_headers = streaming_response._response_headers print("streaming response_headers=", response_headers) assert response_headers is not None assert "x-ratelimit-remaining-tokens" in response_headers @@ -1356,7 +1356,7 @@ def test_completion_openai_response_headers(): input="hello", ) - embedding_response_headers = embedding_response.response_headers + embedding_response_headers = embedding_response._response_headers print("embedding_response_headers=", embedding_response_headers) assert embedding_response_headers is not None assert "x-ratelimit-remaining-tokens" in embedding_response_headers @@ -1386,9 +1386,9 @@ async def test_async_completion_openai_response_headers(): print(f"response: {response}") - print("response_headers=", response.response_headers) - assert response.response_headers is not None - assert "x-ratelimit-remaining-tokens" in response.response_headers + print("response_headers=", response._response_headers) + assert response._response_headers is not None + assert "x-ratelimit-remaining-tokens" in response._response_headers # /chat/completion with streaming @@ -1397,7 +1397,7 @@ async def test_async_completion_openai_response_headers(): messages=messages, stream=True, ) - response_headers = streaming_response.response_headers + response_headers = streaming_response._response_headers print("streaming response_headers=", response_headers) assert response_headers is not None assert "x-ratelimit-remaining-tokens" in response_headers @@ -1411,7 +1411,7 @@ async def test_async_completion_openai_response_headers(): input="hello", ) - embedding_response_headers = embedding_response.response_headers + embedding_response_headers = embedding_response._response_headers print("embedding_response_headers=", embedding_response_headers) assert embedding_response_headers is not None assert "x-ratelimit-remaining-tokens" in embedding_response_headers diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 55e5335f3..6581fea5f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -536,7 +536,7 @@ class ModelResponse(OpenAIObject): _hidden_params: dict = {} - response_headers: Optional[dict] = None + _response_headers: Optional[dict] = None def __init__( self, @@ -551,7 +551,7 @@ class ModelResponse(OpenAIObject): stream_options=None, response_ms=None, hidden_params=None, - response_headers=None, + _response_headers=None, **params, ) -> None: if stream is not None and stream is True: @@ -601,8 +601,8 @@ class ModelResponse(OpenAIObject): if hidden_params: self._hidden_params = hidden_params - if response_headers: - self.response_headers = response_headers + if _response_headers: + self._response_headers = _response_headers init_values = { "id": id, @@ -673,8 +673,7 @@ class EmbeddingResponse(OpenAIObject): """Usage statistics for the embedding request.""" _hidden_params: dict = {} - - response_headers: Optional[dict] = None + _response_headers: Optional[Dict] = None def __init__( self, @@ -684,7 +683,7 @@ class EmbeddingResponse(OpenAIObject): response_ms=None, data=None, hidden_params=None, - response_headers=None, + _response_headers=None, **params, ): object = "list" @@ -702,6 +701,9 @@ class EmbeddingResponse(OpenAIObject): else: usage = Usage() + if _response_headers: + self._response_headers = _response_headers + model = model super().__init__(model=model, object=object, data=data, usage=usage) @@ -984,7 +986,7 @@ class TranscriptionResponse(OpenAIObject): text: Optional[str] = None _hidden_params: dict = {} - response_headers: Optional[dict] = None + _response_headers: Optional[dict] = None def __init__(self, text=None): super().__init__(text=text) diff --git a/litellm/utils.py b/litellm/utils.py index bb173c133..ef4daec5e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5666,7 +5666,7 @@ def convert_to_model_response_object( start_time=None, end_time=None, hidden_params: Optional[dict] = None, - response_headers: Optional[dict] = None, + _response_headers: Optional[dict] = None, ): received_args = locals() ### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary @@ -5765,8 +5765,8 @@ def convert_to_model_response_object( if hidden_params is not None: model_response_object._hidden_params = hidden_params - if response_headers is not None: - model_response_object.response_headers = response_headers + if _response_headers is not None: + model_response_object._response_headers = _response_headers return model_response_object elif response_type == "embedding" and ( @@ -5800,8 +5800,8 @@ def convert_to_model_response_object( if hidden_params is not None: model_response_object._hidden_params = hidden_params - if response_headers is not None: - model_response_object.response_headers = response_headers + if _response_headers is not None: + model_response_object._response_headers = _response_headers return model_response_object elif response_type == "image_generation" and ( @@ -5845,8 +5845,8 @@ def convert_to_model_response_object( if hidden_params is not None: model_response_object._hidden_params = hidden_params - if response_headers is not None: - model_response_object.response_headers = response_headers + if _response_headers is not None: + model_response_object._response_headers = _response_headers return model_response_object except Exception as e: @@ -8273,7 +8273,7 @@ class CustomStreamWrapper: logging_obj=None, stream_options=None, make_call: Optional[Callable] = None, - response_headers: Optional[dict] = None, + _response_headers: Optional[dict] = None, ): self.model = model self.make_call = make_call @@ -8305,7 +8305,7 @@ class CustomStreamWrapper: self._hidden_params = { "model_id": (_model_info.get("id", None)) } # returned as x-litellm-model-id response header in proxy - self.response_headers = response_headers + self._response_headers = _response_headers self.response_id = None self.logging_loop = None self.rules = Rules()