fix(proxy_server.py): return original model response via response headers - /v1/completions

to help devs with debugging
2025-04-26 03:04:13 +00:00 · 2024-04-03 13:05:43 -07:00 · 2024-04-03 13:05:43 -07:00 · 15e0099948
commit 15e0099948
parent f17dd68df3
4 changed files with 44 additions and 8 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3126,14 +3126,21 @@ async def completion(

        if hasattr(response, "_hidden_params"):
            model_id = response._hidden_params.get("model_id", None) or ""
+            original_response = (
+                response._hidden_params.get("original_response", None) or ""
+            )
        else:
            model_id = ""
+            original_response = ""

        verbose_proxy_logger.debug("final response: %s", response)
        if (
            "stream" in data and data["stream"] == True
        ):  # use generate_responses to stream responses
-            custom_headers = {"x-litellm-model-id": model_id}
+            custom_headers = {
+                "x-litellm-model-id": model_id,
+                "x-litellm-original-response": original_response,
+            }
            selected_data_generator = select_data_generator(
                response=response, user_api_key_dict=user_api_key_dict
            )
@ -3145,6 +3152,7 @@ async def completion(
            )

        fastapi_response.headers["x-litellm-model-id"] = model_id
+        fastapi_response.headers["x-litellm-original-response"] = original_response
        return response
    except Exception as e:
        verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")