From e620d2f21972d5199726ec7b0e4475b5390c97bf Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 23 Dec 2023 12:14:09 +0530
Subject: [PATCH] fix(utils.py): log user_id to langfuse

---
 litellm/integrations/langfuse.py |  5 ++-
 litellm/router.py                | 16 +++------
 litellm/tests/langfuse.log       | 59 ++++++++++++++++++++++++++++++--
 litellm/tests/test_langfuse.py   |  8 ++---
 litellm/utils.py                 |  1 +
 5 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index d0c2b3a715..92a97c6c5a 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -39,6 +39,7 @@ class LangFuseLogger:
         # Method definition
 
         try:
+            print(f"result in langfuse logging: {response_obj}")
             print_verbose(
                 f"Langfuse Logging - Enters logging function for model {kwargs}"
             )
@@ -64,7 +65,7 @@ class LangFuseLogger:
             # end of processing langfuse ########################
             input = prompt
             output = response_obj["choices"][0]["message"].json()
-
+            print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}")
             self._log_langfuse_v2(
                 user_id,
                 metadata,
@@ -135,6 +136,7 @@ class LangFuseLogger:
             )
         )
 
+        print(f"LANGFUSE OUTPUT: {output}")
         trace.generation(
             CreateGeneration(
                 name=metadata.get("generation_name", "litellm-completion"),
@@ -171,6 +173,7 @@ class LangFuseLogger:
             user_id=user_id,
         )
 
+        
         trace.generation(
             name=metadata.get("generation_name", "litellm-completion"),
             startTime=start_time,
diff --git a/litellm/router.py b/litellm/router.py
index 6252765217..c176ab60a9 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -710,6 +710,7 @@ class Router:
         Function LiteLLM submits a callback to after a successful
         completion. Purpose of this is to update TPM/RPM usage per model
         """
+        deployment_id =  kwargs.get("litellm_params", {}).get("model_info", {}).get("id", None)
         model_name = kwargs.get('model', None)  # i.e. gpt35turbo
         custom_llm_provider = kwargs.get("litellm_params", {}).get('custom_llm_provider', None)  # i.e. azure
         if custom_llm_provider:
@@ -717,10 +718,10 @@ class Router:
         if kwargs["stream"] is True: 
             if kwargs.get("complete_streaming_response"):
                 total_tokens = kwargs.get("complete_streaming_response")['usage']['total_tokens']
-                self._set_deployment_usage(model_name, total_tokens)
+                self._set_deployment_usage(deployment_id, total_tokens)
         else: 
             total_tokens = completion_response['usage']['total_tokens']
-            self._set_deployment_usage(model_name, total_tokens)
+            self._set_deployment_usage(deployment_id, total_tokens)
         
         self.deployment_latency_map[model_name] = (end_time - start_time).total_seconds()
 
@@ -867,15 +868,8 @@ class Router:
 
         # return deployment with lowest tpm usage
         for item in potential_deployments:
-            deployment_name=item["litellm_params"]["model"]
-            custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None)
-            if custom_llm_provider is not None:
-                deployment_name = f"{custom_llm_provider}/{deployment_name}"
-            else:
-                litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None)
-                if litellm_provider is not None:
-                    deployment_name = f"{litellm_provider}/{deployment_name}"
-            item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name)
+            model_id = item["model_info"].get("id")
+            item_tpm, item_rpm = self._get_deployment_usage(deployment_name=model_id)
 
             if item_tpm == 0:
                 return item
diff --git a/litellm/tests/langfuse.log b/litellm/tests/langfuse.log
index 58c1c8fb22..bc09c5ba2a 100644
--- a/litellm/tests/langfuse.log
+++ b/litellm/tests/langfuse.log
@@ -1,2 +1,57 @@
-close.started
-close.complete
+Starting new HTTPS connection (1): api.anthropic.com:443
+Starting new HTTPS connection (1): litellm-logging.onrender.com:443
+https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
+https://api.anthropic.com:443 "POST /v1/complete HTTP/1.1" 200 None
+Starting new HTTPS connection (1): litellm-logging.onrender.com:443
+Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'this is a streaming test for llama2 + langfuse'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 20, 'stream': True, 'temperature': 0.2}}
+connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
+connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1090f92d0>
+start_tls.started ssl_context=<ssl.SSLContext object at 0x108ddf020> server_hostname='api.openai.com' timeout=600.0
+start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1090f9290>
+send_request_headers.started request=<Request [b'POST']>
+send_request_headers.complete
+send_request_body.started request=<Request [b'POST']>
+send_request_body.complete
+receive_response_headers.started request=<Request [b'POST']>
+https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
+receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:00 GMT'), (b'Content-Type', b'text/event-stream'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-0613'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'62'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999967'), (b'x-ratelimit-remaining-tokens_usage_based', b'999967'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'dd1029a85edecb986fb662945c9f7b4f'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=dnuSnc6BPNJd4lgWKpv3iE2P5zy4r5aCVekXVi7HG7U-1703313180-1-AbeMpAfvmJ6BShULb7tMaErR5ergUrt6ohiXj1e8zoo9AotZ0Jz0alUSUcp8FXyQX2VQ9P6gBUeoSR9aE98OasU=; path=/; expires=Sat, 23-Dec-23 07:03:00 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=dET0GKSNfbtSWNJuXndP8GY8M0ANzDK4Dl7mvIfhmM0-1703313180257-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e920e4f47f4b0-BOM'), (b'alt-svc', b'h3=":443"; ma=86400')])
+HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"
+receive_response_body.started request=<Request [b'POST']>
+receive_response_body.complete
+response_closed.started
+response_closed.complete
+Starting new HTTPS connection (1): litellm-logging.onrender.com:443
+Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris?"}], 'model': 'gpt-3.5-turbo-1106', 'tool_choice': 'auto', 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}}
+connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
+connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10972d410>
+start_tls.started ssl_context=<ssl.SSLContext object at 0x1090c5be0> server_hostname='api.openai.com' timeout=600.0
+start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1097547d0>
+send_request_headers.started request=<Request [b'POST']>
+send_request_headers.complete
+send_request_body.started request=<Request [b'POST']>
+send_request_body.complete
+receive_response_headers.started request=<Request [b'POST']>
+https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
+receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
+receive_response_body.started request=<Request [b'POST']>
+receive_response_body.complete
+response_closed.started
+response_closed.complete
+HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"
+nction': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}}
+connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
+connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10972d410>
+start_tls.started ssl_context=<ssl.SSLContext object at 0x1090c5be0> server_hostname='api.openai.com' timeout=600.0
+start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1097547d0>
+send_request_headers.started request=<Request [b'POST']>
+send_request_headers.complete
+send_request_body.started request=<Request [b'POST']>
+send_request_body.complete
+receive_response_headers.started request=<Request [b'POST']>
+https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
+receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
+receive_response_body.started request=<Request [b'POST']>
+receive_response_body.complete
+response_closed.started
+response_closed.complete
+HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"
diff --git a/litellm/tests/test_langfuse.py b/litellm/tests/test_langfuse.py
index f77d2acdce..033a3df4f9 100644
--- a/litellm/tests/test_langfuse.py
+++ b/litellm/tests/test_langfuse.py
@@ -120,7 +120,7 @@ def test_langfuse_logging_async():
         pytest.fail(f"An exception occurred - {e}")
 
 
-test_langfuse_logging_async()
+# test_langfuse_logging_async()
 
 
 @pytest.mark.skip(reason="beta test - checking langfuse output")
@@ -145,7 +145,7 @@ def test_langfuse_logging():
         pytest.fail(f"An exception occurred - {e}")
 
 
-test_langfuse_logging()
+# test_langfuse_logging()
 
 
 @pytest.mark.skip(reason="beta test - checking langfuse output")
@@ -174,7 +174,7 @@ def test_langfuse_logging_stream():
         print(e)
 
 
-test_langfuse_logging_stream()
+# test_langfuse_logging_stream()
 
 
 @pytest.mark.skip(reason="beta test - checking langfuse output")
@@ -297,4 +297,4 @@ def test_langfuse_logging_tool_calling():
     tool_calls = response.choices[0].message.tool_calls
 
 
-# test_langfuse_logging_tool_calling()
+test_langfuse_logging_tool_calling()
diff --git a/litellm/utils.py b/litellm/utils.py
index 41f6a21c5c..767bc0b07f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1052,6 +1052,7 @@ class Logging:
                             response_obj=result,
                             start_time=start_time,
                             end_time=end_time,
+                            user_id=kwargs.get("user", None),
                             print_verbose=print_verbose,
                         )
                     if callback == "cache" and litellm.cache is not None: