litellm/tests/local_testing/test_alangfuse.py
2024-11-21 22:59:36 -08:00

1224 lines
45 KiB
Python

import asyncio
import copy
import json
import logging
import os
import sys
from typing import Any
from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm import completion
from litellm.caching import InMemoryCache
litellm.num_retries = 3
litellm.success_callback = ["langfuse"]
os.environ["LANGFUSE_DEBUG"] = "True"
import time
import pytest
@pytest.fixture
def langfuse_client():
import langfuse
_langfuse_cache_key = (
f"{os.environ['LANGFUSE_PUBLIC_KEY']}-{os.environ['LANGFUSE_SECRET_KEY']}"
)
# use a in memory langfuse client for testing, RAM util on ci/cd gets too high when we init many langfuse clients
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_langfuse_cache_key)
if _cached_client:
langfuse_client = _cached_client
else:
langfuse_client = langfuse.Langfuse(
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
host="https://us.cloud.langfuse.com",
)
litellm.in_memory_llm_clients_cache.set_cache(
key=_langfuse_cache_key,
value=langfuse_client,
)
print("NEW LANGFUSE CLIENT")
with patch(
"langfuse.Langfuse", MagicMock(return_value=langfuse_client)
) as mock_langfuse_client:
yield mock_langfuse_client()
def search_logs(log_file_path, num_good_logs=1):
"""
Searches the given log file for logs containing the "/api/public" string.
Parameters:
- log_file_path (str): The path to the log file to be searched.
Returns:
- None
Raises:
- Exception: If there are any bad logs found in the log file.
"""
import re
print("\n searching logs")
bad_logs = []
good_logs = []
all_logs = []
try:
with open(log_file_path, "r") as log_file:
lines = log_file.readlines()
print(f"searching logslines: {lines}")
for line in lines:
all_logs.append(line.strip())
if "/api/public" in line:
print("Found log with /api/public:")
print(line.strip())
print("\n\n")
match = re.search(
r'"POST /api/public/ingestion HTTP/1.1" (\d+) (\d+)',
line,
)
if match:
status_code = int(match.group(1))
print("STATUS CODE", status_code)
if (
status_code != 200
and status_code != 201
and status_code != 207
):
print("got a BAD log")
bad_logs.append(line.strip())
else:
good_logs.append(line.strip())
print("\nBad Logs")
print(bad_logs)
if len(bad_logs) > 0:
raise Exception(f"bad logs, Bad logs = {bad_logs}")
assert (
len(good_logs) == num_good_logs
), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}"
print("\nGood Logs")
print(good_logs)
if len(good_logs) <= 0:
raise Exception(
f"There were no Good Logs from Langfuse. No logs with /api/public status 200. \nAll logs:{all_logs}"
)
except Exception as e:
raise e
def pre_langfuse_setup():
"""
Set up the logging for the 'pre_langfuse_setup' function.
"""
# sends logs to langfuse.log
import logging
# Configure the logging to write to a file
logging.basicConfig(filename="langfuse.log", level=logging.DEBUG)
logger = logging.getLogger()
# Add a FileHandler to the logger
file_handler = logging.FileHandler("langfuse.log", mode="w")
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
return
def test_langfuse_logging_async():
# this tests time added to make langfuse logging calls, vs just acompletion calls
try:
pre_langfuse_setup()
litellm.set_verbose = True
# Make 5 calls with an empty success_callback
litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
print("starting langfuse test")
# Make 5 calls with success_callback set to "langfuse"
litellm.success_callback = ["langfuse"]
start_time_langfuse = asyncio.run(make_async_calls())
print("done with langfuse test")
# Compare the time for both scenarios
print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# assert the diff is not more than 1 second - this was 5 seconds before the fix
assert abs(start_time_langfuse - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
tasks = []
for _ in range(5):
tasks.append(create_async_task())
# Measure the start time before running the tasks
start_time = asyncio.get_event_loop().time()
# Wait for all tasks to complete
responses = await asyncio.gather(*tasks)
# Print the responses when tasks return
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
# Calculate the total time taken
total_time = asyncio.get_event_loop().time() - start_time
return total_time
def create_async_task(**completion_kwargs):
"""
Creates an async task for the litellm.acompletion function.
This is just the task, but it is not run here.
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
Any kwargs passed to this function will be passed to the litellm.acompletion function.
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "azure/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "langfuse_latency_test_user",
"mock_response": "It's simple to use and easy to get started",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True])
@pytest.mark.flaky(retries=12, delay=2)
async def test_langfuse_logging_without_request_response(stream, langfuse_client):
try:
import uuid
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.turn_off_message_logging = True
litellm.success_callback = ["langfuse"]
response = await create_async_task(
model="gpt-3.5-turbo",
stream=stream,
metadata={"trace_id": _unique_trace_name},
)
print(response)
if stream:
async for chunk in response:
print(chunk)
langfuse_client.flush()
await asyncio.sleep(5)
# get trace with _unique_trace_name
trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
print("trace_from_langfuse", trace)
_trace_data = trace.data
if (
len(_trace_data) == 0
): # prevent infrequent list index out of range error from langfuse api
return
print(f"_trace_data: {_trace_data}")
assert _trace_data[0].input == {
"messages": [{"content": "redacted-by-litellm", "role": "user"}]
}
assert _trace_data[0].output == {
"role": "assistant",
"content": "redacted-by-litellm",
"function_call": None,
"tool_calls": None,
}
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
# Get the current directory of the file being run
pwd = os.path.dirname(os.path.realpath(__file__))
print(pwd)
file_path = os.path.join(pwd, "gettysburg.wav")
audio_file = open(file_path, "rb")
@pytest.mark.asyncio
@pytest.mark.flaky(retries=4, delay=2)
async def test_langfuse_logging_audio_transcriptions(langfuse_client):
"""
Test that creates a trace with masked input and output
"""
import uuid
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
await litellm.atranscription(
model="whisper-1",
file=audio_file,
metadata={
"trace_id": _unique_trace_name,
},
)
langfuse_client.flush()
await asyncio.sleep(20)
# get trace with _unique_trace_name
print("lookiing up trace", _unique_trace_name)
trace = langfuse_client.get_trace(id=_unique_trace_name)
generations = list(
reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data)
)
print("generations for given trace=", generations)
assert len(generations) == 1
assert generations[0].name == "litellm-atranscription"
assert generations[0].output is not None
@pytest.mark.asyncio
async def test_langfuse_masked_input_output(langfuse_client):
"""
Test that creates a trace with masked input and output
"""
import uuid
for mask_value in [True, False]:
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
response = await create_async_task(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "This is a test"}],
metadata={
"trace_id": _unique_trace_name,
"mask_input": mask_value,
"mask_output": mask_value,
},
mock_response="This is a test response",
)
print(response)
expected_input = "redacted-by-litellm" if mask_value else "This is a test"
expected_output = (
"redacted-by-litellm" if mask_value else "This is a test response"
)
langfuse_client.flush()
await asyncio.sleep(30)
# get trace with _unique_trace_name
trace = langfuse_client.get_trace(id=_unique_trace_name)
print("trace_from_langfuse", trace)
generations = list(
reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data)
)
assert expected_input in str(trace.input)
assert expected_output in str(trace.output)
if len(generations) > 0:
assert expected_input in str(generations[0].input)
assert expected_output in str(generations[0].output)
@pytest.mark.asyncio
@pytest.mark.flaky(retries=12, delay=2)
async def test_aaalangfuse_logging_metadata(langfuse_client):
"""
Test that creates multiple traces, with a varying number of generations and sets various metadata fields
Confirms that no metadata that is standard within Langfuse is duplicated in the respective trace or generation metadata
For trace continuation certain metadata of the trace is overriden with metadata from the last generation based on the update_trace_keys field
Version is set for both the trace and the generation
Release is just set for the trace
Tags is just set for the trace
"""
import uuid
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
trace_identifiers = {}
expected_filtered_metadata_keys = {
"trace_name",
"trace_id",
"existing_trace_id",
"trace_user_id",
"session_id",
"tags",
"generation_name",
"generation_id",
"prompt",
}
trace_metadata = {
"trace_actual_metadata_key": "trace_actual_metadata_value"
} # Allows for setting the metadata on the trace
run_id = str(uuid.uuid4())
session_id = f"litellm-test-session-{run_id}"
trace_common_metadata = {
"session_id": session_id,
"tags": ["litellm-test-tag1", "litellm-test-tag2"],
"update_trace_keys": [
"output",
"trace_metadata",
], # Overwrite the following fields in the trace with the last generation's output and the trace_user_id
"trace_metadata": trace_metadata,
"gen_metadata_key": "gen_metadata_value", # Metadata key that should not be filtered in the generation
"trace_release": "litellm-test-release",
"version": "litellm-test-version",
}
for trace_num in range(1, 3): # Two traces
metadata = copy.deepcopy(trace_common_metadata)
trace_id = f"litellm-test-trace{trace_num}-{run_id}"
metadata["trace_id"] = trace_id
metadata["trace_name"] = trace_id
trace_identifiers[trace_id] = []
print(f"Trace: {trace_id}")
for generation_num in range(
1, trace_num + 1
): # Each trace has a number of generations equal to its trace number
metadata["trace_user_id"] = f"litellm-test-user{generation_num}-{run_id}"
generation_id = (
f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}"
)
metadata["generation_id"] = generation_id
metadata["generation_name"] = generation_id
metadata["trace_metadata"][
"generation_id"
] = generation_id # Update to test if trace_metadata is overwritten by update trace keys
trace_identifiers[trace_id].append(generation_id)
print(f"Generation: {generation_id}")
response = await create_async_task(
model="gpt-3.5-turbo",
mock_response=f"{session_id}:{trace_id}:{generation_id}",
messages=[
{
"role": "user",
"content": f"{session_id}:{trace_id}:{generation_id}",
}
],
max_tokens=100,
temperature=0.2,
metadata=copy.deepcopy(
metadata
), # Every generation needs its own metadata, langfuse is not async/thread safe without it
)
print(response)
metadata["existing_trace_id"] = trace_id
await asyncio.sleep(2)
langfuse_client.flush()
await asyncio.sleep(4)
# Tests the metadata filtering and the override of the output to be the last generation
for trace_id, generation_ids in trace_identifiers.items():
try:
trace = langfuse_client.get_trace(id=trace_id)
except Exception as e:
if "not found within authorized project" in str(e):
print(f"Trace {trace_id} not found")
continue
assert trace.id == trace_id
assert trace.session_id == session_id
assert trace.metadata != trace_metadata
generations = list(
reversed(langfuse_client.get_generations(trace_id=trace_id).data)
)
assert len(generations) == len(generation_ids)
assert (
trace.input == generations[0].input
) # Should be set by the first generation
assert (
trace.output == generations[-1].output
) # Should be overwritten by the last generation according to update_trace_keys
assert (
trace.metadata != generations[-1].metadata
) # Should be overwritten by the last generation according to update_trace_keys
assert trace.metadata["generation_id"] == generations[-1].id
assert set(trace.tags).issuperset(trace_common_metadata["tags"])
print("trace_from_langfuse", trace)
for generation_id, generation in zip(generation_ids, generations):
assert generation.id == generation_id
assert generation.trace_id == trace_id
print(
"common keys in trace",
set(generation.metadata.keys()).intersection(
expected_filtered_metadata_keys
),
)
assert set(generation.metadata.keys()).isdisjoint(
expected_filtered_metadata_keys
)
print("generation_from_langfuse", generation)
# test_langfuse_logging()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_stream():
try:
litellm.set_verbose = True
response = completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "this is a streaming test for llama2 + langfuse",
}
],
max_tokens=20,
temperature=0.2,
stream=True,
)
print(response)
for chunk in response:
pass
# print(chunk)
except litellm.Timeout as e:
pass
except Exception as e:
print(e)
# test_langfuse_logging_stream()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_custom_generation_name():
try:
litellm.set_verbose = True
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
max_tokens=10,
metadata={
"langfuse/foo": "bar",
"langsmith/fizz": "buzz",
"prompt_hash": "asdf98u0j9131123",
"generation_name": "ishaan-test-generation",
"generation_id": "gen-id22",
"trace_id": "trace-id22",
"trace_user_id": "user-id2",
},
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
print(e)
# test_langfuse_logging_custom_generation_name()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_embedding():
try:
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
response = litellm.embedding(
model="text-embedding-ada-002",
input=["gm", "ishaan"],
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
print(e)
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_function_calling():
litellm.set_verbose = True
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "what's the weather in boston"}],
temperature=0.1,
functions=function1,
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
print(e)
# test_langfuse_logging_function_calling()
@pytest.mark.skip(reason="Need to address this on main")
def test_aaalangfuse_existing_trace_id():
"""
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
Pass 1 logging object with a trace
Pass 2nd logging object with the trace id
Assert no changes to the trace
"""
# Test - if the logs were sent to the correct team on langfuse
import datetime
import litellm
from litellm.integrations.langfuse.langfuse import LangFuseLogger
langfuse_Logger = LangFuseLogger(
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
litellm.success_callback = ["langfuse"]
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
### NEW TRACE ###
message = [{"role": "user", "content": "what's the weather in boston"}]
langfuse_args = {
"response_obj": response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": None,
"model_alias_map": {},
"completion_call_id": None,
"metadata": None,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": message,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": None,
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
trace_id = langfuse_response_object["trace_id"]
assert trace_id is not None
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
### EXISTING TRACE ###
new_metadata = {"existing_trace_id": trace_id}
new_messages = [{"role": "user", "content": "What do you know?"}]
new_response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="What do I know?",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
langfuse_args = {
"response_obj": new_response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": new_metadata,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": new_messages,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
new_trace_id = langfuse_response_object["trace_id"]
assert new_trace_id == trace_id
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
initial_langfuse_trace_dict = dict(initial_langfuse_trace)
initial_langfuse_trace_dict.pop("updatedAt")
initial_langfuse_trace_dict.pop("timestamp")
new_langfuse_trace_dict = dict(new_langfuse_trace)
new_langfuse_trace_dict.pop("updatedAt")
new_langfuse_trace_dict.pop("timestamp")
assert initial_langfuse_trace_dict == new_langfuse_trace_dict
@pytest.mark.skipif(
condition=not os.environ.get("OPENAI_API_KEY", False),
reason="Authentication missing for openai",
)
def test_langfuse_logging_tool_calling():
litellm.set_verbose = True
def get_current_weather(location, unit="fahrenheit"):
"""Get the current weather in a given location"""
if "tokyo" in location.lower():
return json.dumps(
{"location": "Tokyo", "temperature": "10", "unit": "celsius"}
)
elif "san francisco" in location.lower():
return json.dumps(
{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
)
elif "paris" in location.lower():
return json.dumps(
{"location": "Paris", "temperature": "22", "unit": "celsius"}
)
else:
return json.dumps({"location": location, "temperature": "unknown"})
messages = [
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris?",
}
]
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
response = litellm.completion(
model="gpt-3.5-turbo-1106",
messages=messages,
tools=tools,
tool_choice="auto", # auto is default, but we'll be explicit
)
print("\nLLM Response1:\n", response)
response_message = response.choices[0].message
tool_calls = response.choices[0].message.tool_calls
# test_langfuse_logging_tool_calling()
def get_langfuse_prompt(name: str):
import langfuse
from langfuse import Langfuse
try:
langfuse = Langfuse(
public_key=os.environ["LANGFUSE_DEV_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_DEV_SK_KEY"],
host=os.environ["LANGFUSE_HOST"],
)
# Get current production version of a text prompt
prompt = langfuse.get_prompt(name=name)
return prompt
except Exception as e:
raise Exception(f"Error getting prompt: {e}")
@pytest.mark.asyncio
@pytest.mark.skip(
reason="local only test, use this to verify if we can send request to litellm proxy server"
)
async def test_make_request():
response = await litellm.acompletion(
model="openai/llama3",
api_key="sk-1234",
base_url="http://localhost:4000",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
extra_body={
"metadata": {
"tags": ["openai"],
"prompt": get_langfuse_prompt("test-chat"),
}
},
)
@pytest.mark.skip(
reason="local only test, use this to verify if dynamic langfuse logging works as expected"
)
def test_aaalangfuse_dynamic_logging():
"""
pass in langfuse credentials via completion call
assert call is logged.
Covers the team-logging scenario.
"""
import uuid
import langfuse
trace_id = str(uuid.uuid4())
_ = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey"}],
mock_response="Hey! how's it going?",
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
langfuse_secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
metadata={"trace_id": trace_id},
success_callback=["langfuse"],
)
time.sleep(3)
langfuse_client = langfuse.Langfuse(
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
langfuse_client.get_trace(id=trace_id)
import datetime
generation_params = {
"name": "litellm-acompletion",
"id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
"start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
"end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
"model": "gpt-4o",
"model_parameters": {
"stream": False,
"max_retries": 0,
"extra_body": "{}",
"system_fingerprint": "fp_52a7f40b0b",
},
"input": {
"messages": [
{"content": "<>", "role": "system"},
{"content": "<>", "role": "user"},
]
},
"output": {
"content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
"role": "assistant",
"tool_calls": None,
"function_call": None,
},
"usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
"metadata": {
"prompt": {
"name": "conversational-service-answer_question_restricted_reply",
"version": 9,
"config": {},
"labels": ["latest", "staging", "production"],
"tags": ["conversational-service"],
"prompt": [
{"role": "system", "content": "<>"},
{"role": "user", "content": "{{text}}"},
],
},
"requester_metadata": {
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
"trace_name": "jess",
"tags": ["conversational-service", "generative-ai-engine", "staging"],
"prompt": {
"name": "conversational-service-answer_question_restricted_reply",
"version": 9,
"config": {},
"labels": ["latest", "staging", "production"],
"tags": ["conversational-service"],
"prompt": [
{"role": "system", "content": "<>"},
{"role": "user", "content": "{{text}}"},
],
},
},
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"litellm_api_version": "0.0.0",
"user_api_key_user_id": "default_user_id",
"user_api_key_spend": 0.0,
"user_api_key_metadata": {},
"requester_ip_address": "127.0.0.1",
"model_group": "gpt-4o",
"model_group_size": 0,
"deployment": "gpt-4o",
"model_info": {
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
"db_model": False,
},
"hidden_params": {
"headers": {
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
"content-type": "application/json",
"transfer-encoding": "chunked",
"connection": "keep-alive",
"access-control-expose-headers": "X-Request-ID",
"openai-organization": "reliablekeystest",
"openai-processing-ms": "329",
"openai-version": "2020-10-01",
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
"x-ratelimit-limit-requests": "10000",
"x-ratelimit-limit-tokens": "30000000",
"x-ratelimit-remaining-requests": "9999",
"x-ratelimit-remaining-tokens": "29999980",
"x-ratelimit-reset-requests": "6ms",
"x-ratelimit-reset-tokens": "0s",
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
"cf-cache-status": "DYNAMIC",
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
"x-content-type-options": "nosniff",
"server": "cloudflare",
"cf-ray": "8c8cc573becb232c-SJC",
"content-encoding": "gzip",
"alt-svc": 'h3=":443"; ma=86400',
},
"additional_headers": {
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
"llm_provider-content-type": "application/json",
"llm_provider-transfer-encoding": "chunked",
"llm_provider-connection": "keep-alive",
"llm_provider-access-control-expose-headers": "X-Request-ID",
"llm_provider-openai-organization": "reliablekeystest",
"llm_provider-openai-processing-ms": "329",
"llm_provider-openai-version": "2020-10-01",
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
"llm_provider-x-ratelimit-limit-requests": "10000",
"llm_provider-x-ratelimit-limit-tokens": "30000000",
"llm_provider-x-ratelimit-remaining-requests": "9999",
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
"llm_provider-x-ratelimit-reset-requests": "6ms",
"llm_provider-x-ratelimit-reset-tokens": "0s",
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
"llm_provider-cf-cache-status": "DYNAMIC",
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
"llm_provider-x-content-type-options": "nosniff",
"llm_provider-server": "cloudflare",
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
"llm_provider-content-encoding": "gzip",
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
},
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
"api_base": "https://api.openai.com",
"optional_params": {
"stream": False,
"max_retries": 0,
"extra_body": {},
},
"response_cost": 0.00038,
},
"litellm_response_cost": 0.00038,
"api_base": "https://api.openai.com/v1/",
"cache_hit": False,
},
"level": "DEFAULT",
"version": None,
}
@pytest.mark.parametrize(
"prompt",
[
[
{"role": "system", "content": "<>"},
{"role": "user", "content": "{{text}}"},
],
"hello world",
],
)
def test_langfuse_prompt_type(prompt):
from litellm.integrations.langfuse.langfuse import _add_prompt_to_generation_params
clean_metadata = {
"prompt": {
"name": "conversational-service-answer_question_restricted_reply",
"version": 9,
"config": {},
"labels": ["latest", "staging", "production"],
"tags": ["conversational-service"],
"prompt": prompt,
},
"requester_metadata": {
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
"trace_name": "jess",
"tags": ["conversational-service", "generative-ai-engine", "staging"],
"prompt": {
"name": "conversational-service-answer_question_restricted_reply",
"version": 9,
"config": {},
"labels": ["latest", "staging", "production"],
"tags": ["conversational-service"],
"prompt": [
{"role": "system", "content": "<>"},
{"role": "user", "content": "{{text}}"},
],
},
},
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"litellm_api_version": "0.0.0",
"user_api_key_user_id": "default_user_id",
"user_api_key_spend": 0.0,
"user_api_key_metadata": {},
"requester_ip_address": "127.0.0.1",
"model_group": "gpt-4o",
"model_group_size": 0,
"deployment": "gpt-4o",
"model_info": {
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
"db_model": False,
},
"hidden_params": {
"headers": {
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
"content-type": "application/json",
"transfer-encoding": "chunked",
"connection": "keep-alive",
"access-control-expose-headers": "X-Request-ID",
"openai-organization": "reliablekeystest",
"openai-processing-ms": "329",
"openai-version": "2020-10-01",
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
"x-ratelimit-limit-requests": "10000",
"x-ratelimit-limit-tokens": "30000000",
"x-ratelimit-remaining-requests": "9999",
"x-ratelimit-remaining-tokens": "29999980",
"x-ratelimit-reset-requests": "6ms",
"x-ratelimit-reset-tokens": "0s",
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
"cf-cache-status": "DYNAMIC",
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
"x-content-type-options": "nosniff",
"server": "cloudflare",
"cf-ray": "8c8cc573becb232c-SJC",
"content-encoding": "gzip",
"alt-svc": 'h3=":443"; ma=86400',
},
"additional_headers": {
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
"llm_provider-content-type": "application/json",
"llm_provider-transfer-encoding": "chunked",
"llm_provider-connection": "keep-alive",
"llm_provider-access-control-expose-headers": "X-Request-ID",
"llm_provider-openai-organization": "reliablekeystest",
"llm_provider-openai-processing-ms": "329",
"llm_provider-openai-version": "2020-10-01",
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
"llm_provider-x-ratelimit-limit-requests": "10000",
"llm_provider-x-ratelimit-limit-tokens": "30000000",
"llm_provider-x-ratelimit-remaining-requests": "9999",
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
"llm_provider-x-ratelimit-reset-requests": "6ms",
"llm_provider-x-ratelimit-reset-tokens": "0s",
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
"llm_provider-cf-cache-status": "DYNAMIC",
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
"llm_provider-x-content-type-options": "nosniff",
"llm_provider-server": "cloudflare",
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
"llm_provider-content-encoding": "gzip",
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
},
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
"api_base": "https://api.openai.com",
"optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
"response_cost": 0.00038,
},
"litellm_response_cost": 0.00038,
"api_base": "https://api.openai.com/v1/",
"cache_hit": False,
}
_add_prompt_to_generation_params(
generation_params=generation_params, clean_metadata=clean_metadata
)
def test_langfuse_logging_metadata():
from litellm.integrations.langfuse.langfuse import log_requester_metadata
metadata = {"key": "value", "requester_metadata": {"key": "value"}}
got_metadata = log_requester_metadata(clean_metadata=metadata)
expected_metadata = {"requester_metadata": {"key": "value"}}
assert expected_metadata == got_metadata