From e620d2f21972d5199726ec7b0e4475b5390c97bf Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 23 Dec 2023 12:14:09 +0530 Subject: [PATCH] fix(utils.py): log user_id to langfuse --- litellm/integrations/langfuse.py | 5 ++- litellm/router.py | 16 +++------ litellm/tests/langfuse.log | 59 ++++++++++++++++++++++++++++++-- litellm/tests/test_langfuse.py | 8 ++--- litellm/utils.py | 1 + 5 files changed, 71 insertions(+), 18 deletions(-) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index d0c2b3a715..92a97c6c5a 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -39,6 +39,7 @@ class LangFuseLogger: # Method definition try: + print(f"result in langfuse logging: {response_obj}") print_verbose( f"Langfuse Logging - Enters logging function for model {kwargs}" ) @@ -64,7 +65,7 @@ class LangFuseLogger: # end of processing langfuse ######################## input = prompt output = response_obj["choices"][0]["message"].json() - + print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}") self._log_langfuse_v2( user_id, metadata, @@ -135,6 +136,7 @@ class LangFuseLogger: ) ) + print(f"LANGFUSE OUTPUT: {output}") trace.generation( CreateGeneration( name=metadata.get("generation_name", "litellm-completion"), @@ -171,6 +173,7 @@ class LangFuseLogger: user_id=user_id, ) + trace.generation( name=metadata.get("generation_name", "litellm-completion"), startTime=start_time, diff --git a/litellm/router.py b/litellm/router.py index 6252765217..c176ab60a9 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -710,6 +710,7 @@ class Router: Function LiteLLM submits a callback to after a successful completion. Purpose of this is to update TPM/RPM usage per model """ + deployment_id = kwargs.get("litellm_params", {}).get("model_info", {}).get("id", None) model_name = kwargs.get('model', None) # i.e. gpt35turbo custom_llm_provider = kwargs.get("litellm_params", {}).get('custom_llm_provider', None) # i.e. azure if custom_llm_provider: @@ -717,10 +718,10 @@ class Router: if kwargs["stream"] is True: if kwargs.get("complete_streaming_response"): total_tokens = kwargs.get("complete_streaming_response")['usage']['total_tokens'] - self._set_deployment_usage(model_name, total_tokens) + self._set_deployment_usage(deployment_id, total_tokens) else: total_tokens = completion_response['usage']['total_tokens'] - self._set_deployment_usage(model_name, total_tokens) + self._set_deployment_usage(deployment_id, total_tokens) self.deployment_latency_map[model_name] = (end_time - start_time).total_seconds() @@ -867,15 +868,8 @@ class Router: # return deployment with lowest tpm usage for item in potential_deployments: - deployment_name=item["litellm_params"]["model"] - custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None) - if custom_llm_provider is not None: - deployment_name = f"{custom_llm_provider}/{deployment_name}" - else: - litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None) - if litellm_provider is not None: - deployment_name = f"{litellm_provider}/{deployment_name}" - item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name) + model_id = item["model_info"].get("id") + item_tpm, item_rpm = self._get_deployment_usage(deployment_name=model_id) if item_tpm == 0: return item diff --git a/litellm/tests/langfuse.log b/litellm/tests/langfuse.log index 58c1c8fb22..bc09c5ba2a 100644 --- a/litellm/tests/langfuse.log +++ b/litellm/tests/langfuse.log @@ -1,2 +1,57 @@ -close.started -close.complete +Starting new HTTPS connection (1): api.anthropic.com:443 +Starting new HTTPS connection (1): litellm-logging.onrender.com:443 +https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38 +https://api.anthropic.com:443 "POST /v1/complete HTTP/1.1" 200 None +Starting new HTTPS connection (1): litellm-logging.onrender.com:443 +Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'this is a streaming test for llama2 + langfuse'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 20, 'stream': True, 'temperature': 0.2}} +connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None +connect_tcp.complete return_value= +start_tls.started ssl_context= server_hostname='api.openai.com' timeout=600.0 +start_tls.complete return_value= +send_request_headers.started request= +send_request_headers.complete +send_request_body.started request= +send_request_body.complete +receive_response_headers.started request= +https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38 +receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:00 GMT'), (b'Content-Type', b'text/event-stream'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-0613'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'62'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999967'), (b'x-ratelimit-remaining-tokens_usage_based', b'999967'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'dd1029a85edecb986fb662945c9f7b4f'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=dnuSnc6BPNJd4lgWKpv3iE2P5zy4r5aCVekXVi7HG7U-1703313180-1-AbeMpAfvmJ6BShULb7tMaErR5ergUrt6ohiXj1e8zoo9AotZ0Jz0alUSUcp8FXyQX2VQ9P6gBUeoSR9aE98OasU=; path=/; expires=Sat, 23-Dec-23 07:03:00 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=dET0GKSNfbtSWNJuXndP8GY8M0ANzDK4Dl7mvIfhmM0-1703313180257-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e920e4f47f4b0-BOM'), (b'alt-svc', b'h3=":443"; ma=86400')]) +HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK" +receive_response_body.started request= +receive_response_body.complete +response_closed.started +response_closed.complete +Starting new HTTPS connection (1): litellm-logging.onrender.com:443 +Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris?"}], 'model': 'gpt-3.5-turbo-1106', 'tool_choice': 'auto', 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}} +connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None +connect_tcp.complete return_value= +start_tls.started ssl_context= server_hostname='api.openai.com' timeout=600.0 +start_tls.complete return_value= +send_request_headers.started request= +send_request_headers.complete +send_request_body.started request= +send_request_body.complete +receive_response_headers.started request= +https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38 +receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')]) +receive_response_body.started request= +receive_response_body.complete +response_closed.started +response_closed.complete +HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK" +nction': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}} +connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None +connect_tcp.complete return_value= +start_tls.started ssl_context= server_hostname='api.openai.com' timeout=600.0 +start_tls.complete return_value= +send_request_headers.started request= +send_request_headers.complete +send_request_body.started request= +send_request_body.complete +receive_response_headers.started request= +https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38 +receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')]) +receive_response_body.started request= +receive_response_body.complete +response_closed.started +response_closed.complete +HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK" diff --git a/litellm/tests/test_langfuse.py b/litellm/tests/test_langfuse.py index f77d2acdce..033a3df4f9 100644 --- a/litellm/tests/test_langfuse.py +++ b/litellm/tests/test_langfuse.py @@ -120,7 +120,7 @@ def test_langfuse_logging_async(): pytest.fail(f"An exception occurred - {e}") -test_langfuse_logging_async() +# test_langfuse_logging_async() @pytest.mark.skip(reason="beta test - checking langfuse output") @@ -145,7 +145,7 @@ def test_langfuse_logging(): pytest.fail(f"An exception occurred - {e}") -test_langfuse_logging() +# test_langfuse_logging() @pytest.mark.skip(reason="beta test - checking langfuse output") @@ -174,7 +174,7 @@ def test_langfuse_logging_stream(): print(e) -test_langfuse_logging_stream() +# test_langfuse_logging_stream() @pytest.mark.skip(reason="beta test - checking langfuse output") @@ -297,4 +297,4 @@ def test_langfuse_logging_tool_calling(): tool_calls = response.choices[0].message.tool_calls -# test_langfuse_logging_tool_calling() +test_langfuse_logging_tool_calling() diff --git a/litellm/utils.py b/litellm/utils.py index 41f6a21c5c..767bc0b07f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1052,6 +1052,7 @@ class Logging: response_obj=result, start_time=start_time, end_time=end_time, + user_id=kwargs.get("user", None), print_verbose=print_verbose, ) if callback == "cache" and litellm.cache is not None: