fix(utils.py): log user_id to langfuse

This commit is contained in:
Krrish Dholakia 2023-12-23 12:14:09 +05:30
parent 2df5ce4b7c
commit e620d2f219
5 changed files with 71 additions and 18 deletions

View file

@ -39,6 +39,7 @@ class LangFuseLogger:
# Method definition
try:
print(f"result in langfuse logging: {response_obj}")
print_verbose(
f"Langfuse Logging - Enters logging function for model {kwargs}"
)
@ -64,7 +65,7 @@ class LangFuseLogger:
# end of processing langfuse ########################
input = prompt
output = response_obj["choices"][0]["message"].json()
print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}")
self._log_langfuse_v2(
user_id,
metadata,
@ -135,6 +136,7 @@ class LangFuseLogger:
)
)
print(f"LANGFUSE OUTPUT: {output}")
trace.generation(
CreateGeneration(
name=metadata.get("generation_name", "litellm-completion"),
@ -171,6 +173,7 @@ class LangFuseLogger:
user_id=user_id,
)
trace.generation(
name=metadata.get("generation_name", "litellm-completion"),
startTime=start_time,

View file

@ -710,6 +710,7 @@ class Router:
Function LiteLLM submits a callback to after a successful
completion. Purpose of this is to update TPM/RPM usage per model
"""
deployment_id = kwargs.get("litellm_params", {}).get("model_info", {}).get("id", None)
model_name = kwargs.get('model', None) # i.e. gpt35turbo
custom_llm_provider = kwargs.get("litellm_params", {}).get('custom_llm_provider', None) # i.e. azure
if custom_llm_provider:
@ -717,10 +718,10 @@ class Router:
if kwargs["stream"] is True:
if kwargs.get("complete_streaming_response"):
total_tokens = kwargs.get("complete_streaming_response")['usage']['total_tokens']
self._set_deployment_usage(model_name, total_tokens)
self._set_deployment_usage(deployment_id, total_tokens)
else:
total_tokens = completion_response['usage']['total_tokens']
self._set_deployment_usage(model_name, total_tokens)
self._set_deployment_usage(deployment_id, total_tokens)
self.deployment_latency_map[model_name] = (end_time - start_time).total_seconds()
@ -867,15 +868,8 @@ class Router:
# return deployment with lowest tpm usage
for item in potential_deployments:
deployment_name=item["litellm_params"]["model"]
custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None)
if custom_llm_provider is not None:
deployment_name = f"{custom_llm_provider}/{deployment_name}"
else:
litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None)
if litellm_provider is not None:
deployment_name = f"{litellm_provider}/{deployment_name}"
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name)
model_id = item["model_info"].get("id")
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=model_id)
if item_tpm == 0:
return item

View file

@ -1,2 +1,57 @@
close.started
close.complete
Starting new HTTPS connection (1): api.anthropic.com:443
Starting new HTTPS connection (1): litellm-logging.onrender.com:443
https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
https://api.anthropic.com:443 "POST /v1/complete HTTP/1.1" 200 None
Starting new HTTPS connection (1): litellm-logging.onrender.com:443
Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'this is a streaming test for llama2 + langfuse'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 20, 'stream': True, 'temperature': 0.2}}
connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1090f92d0>
start_tls.started ssl_context=<ssl.SSLContext object at 0x108ddf020> server_hostname='api.openai.com' timeout=600.0
start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1090f9290>
send_request_headers.started request=<Request [b'POST']>
send_request_headers.complete
send_request_body.started request=<Request [b'POST']>
send_request_body.complete
receive_response_headers.started request=<Request [b'POST']>
https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:00 GMT'), (b'Content-Type', b'text/event-stream'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-0613'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'62'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999967'), (b'x-ratelimit-remaining-tokens_usage_based', b'999967'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'dd1029a85edecb986fb662945c9f7b4f'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=dnuSnc6BPNJd4lgWKpv3iE2P5zy4r5aCVekXVi7HG7U-1703313180-1-AbeMpAfvmJ6BShULb7tMaErR5ergUrt6ohiXj1e8zoo9AotZ0Jz0alUSUcp8FXyQX2VQ9P6gBUeoSR9aE98OasU=; path=/; expires=Sat, 23-Dec-23 07:03:00 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=dET0GKSNfbtSWNJuXndP8GY8M0ANzDK4Dl7mvIfhmM0-1703313180257-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e920e4f47f4b0-BOM'), (b'alt-svc', b'h3=":443"; ma=86400')])
HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"
receive_response_body.started request=<Request [b'POST']>
receive_response_body.complete
response_closed.started
response_closed.complete
Starting new HTTPS connection (1): litellm-logging.onrender.com:443
Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris?"}], 'model': 'gpt-3.5-turbo-1106', 'tool_choice': 'auto', 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}}
connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10972d410>
start_tls.started ssl_context=<ssl.SSLContext object at 0x1090c5be0> server_hostname='api.openai.com' timeout=600.0
start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1097547d0>
send_request_headers.started request=<Request [b'POST']>
send_request_headers.complete
send_request_body.started request=<Request [b'POST']>
send_request_body.complete
receive_response_headers.started request=<Request [b'POST']>
https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
receive_response_body.started request=<Request [b'POST']>
receive_response_body.complete
response_closed.started
response_closed.complete
HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"
nction': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}]}}
connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=600.0 socket_options=None
connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10972d410>
start_tls.started ssl_context=<ssl.SSLContext object at 0x1090c5be0> server_hostname='api.openai.com' timeout=600.0
start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1097547d0>
send_request_headers.started request=<Request [b'POST']>
send_request_headers.complete
send_request_body.started request=<Request [b'POST']>
send_request_body.complete
receive_response_headers.started request=<Request [b'POST']>
https://litellm-logging.onrender.com:443 "POST /logging HTTP/1.1" 200 38
receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 23 Dec 2023 06:33:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-1106'), (b'openai-organization', b'reliablekeystest'), (b'openai-processing-ms', b'2145'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'9000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-limit-tokens_usage_based', b'1000000'), (b'x-ratelimit-remaining-requests', b'8998'), (b'x-ratelimit-remaining-tokens', b'999968'), (b'x-ratelimit-remaining-tokens_usage_based', b'999968'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-ratelimit-reset-tokens_usage_based', b'1ms'), (b'x-request-id', b'd0fd54d3a7696ee677f3690e9e0d6d04'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=P_4fUmw4vvrbGKTlavf9VWuuzzro87gvhLE0DEGKA84-1703313183-1-ARgz+AQXAzH1uTTK8iyPE3QnT8TovAP61UvYsFD+d5DWM0lFi5U2+eSgPH+Pqt+Y1fNH1FWBUn9DmVceJKvyLcU=; path=/; expires=Sat, 23-Dec-23 07:03:03 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=g.nvBthte.6BJ7KHg5tihyGwupeGfMNMGnw72QUUBQc-1703313183034-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'839e92128b7ff2e2-BOM'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
receive_response_body.started request=<Request [b'POST']>
receive_response_body.complete
response_closed.started
response_closed.complete
HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK"

View file

@ -120,7 +120,7 @@ def test_langfuse_logging_async():
pytest.fail(f"An exception occurred - {e}")
test_langfuse_logging_async()
# test_langfuse_logging_async()
@pytest.mark.skip(reason="beta test - checking langfuse output")
@ -145,7 +145,7 @@ def test_langfuse_logging():
pytest.fail(f"An exception occurred - {e}")
test_langfuse_logging()
# test_langfuse_logging()
@pytest.mark.skip(reason="beta test - checking langfuse output")
@ -174,7 +174,7 @@ def test_langfuse_logging_stream():
print(e)
test_langfuse_logging_stream()
# test_langfuse_logging_stream()
@pytest.mark.skip(reason="beta test - checking langfuse output")
@ -297,4 +297,4 @@ def test_langfuse_logging_tool_calling():
tool_calls = response.choices[0].message.tool_calls
# test_langfuse_logging_tool_calling()
test_langfuse_logging_tool_calling()

View file

@ -1052,6 +1052,7 @@ class Logging:
response_obj=result,
start_time=start_time,
end_time=end_time,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
)
if callback == "cache" and litellm.cache is not None: