Litellm dev 01 07 2025 p3 (#7635)

* fix(__init__.py): fix mistral large tool calling

map bedrock mistral large to converse endpoint

Fixes https://github.com/BerriAI/litellm/issues/7521

* braintrust logging: respect project_id, add more metrics + more (#7613)

* braintrust logging: respect project_id, add more metrics

* braintrust logger: improve json formatting

* braintrust logger: add test for passing specific project_id

* rm unneeded import

* braintrust logging: rm unneeded var in tets

* add project_name

* update docs

---------

Co-authored-by: H <no@email.com>

---------

Co-authored-by: hi019 <65871571+hi019@users.noreply.github.com>
Co-authored-by: H <no@email.com>
This commit is contained in:
Krish Dholakia 2025-01-08 11:46:24 -08:00 committed by GitHub
parent 07c5f136f1
commit a187cee538
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 140 additions and 72 deletions

View file

@ -67,7 +67,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
}'
```
## Advanced - pass Project ID
## Advanced - pass Project ID or name
<Tabs>
<TabItem value="sdk" label="SDK">
@ -79,7 +79,10 @@ response = litellm.completion(
{"role": "user", "content": "Hi 👋 - i'm openai"}
],
metadata={
"project_id": "my-special-project"
"project_id": "1234",
# passing project_name will try to find a project with that name, or create one if it doesn't exist
# if both project_id and project_name are passed, project_id will be used
# "project_name": "my-special-project"
}
)
```

View file

@ -425,6 +425,7 @@ BEDROCK_CONVERSE_MODELS = [
"meta.llama3-1-405b-instruct-v1:0",
"meta.llama3-70b-instruct-v1:0",
"mistral.mistral-large-2407-v1:0",
"mistral.mistral-large-2402-v1:0",
"meta.llama3-2-1b-instruct-v1:0",
"meta.llama3-2-3b-instruct-v1:0",
"meta.llama3-2-11b-instruct-v1:0",

View file

@ -4,7 +4,7 @@
import copy
import os
from datetime import datetime
from typing import Optional
from typing import Optional, Dict
import httpx
from pydantic import BaseModel
@ -19,9 +19,7 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
)
global_braintrust_http_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
global_braintrust_sync_http_handler = HTTPHandler()
API_BASE = "https://api.braintrustdata.com/v1"
@ -37,9 +35,7 @@ def get_utc_datetime():
class BraintrustLogger(CustomLogger):
def __init__(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> None:
def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None) -> None:
super().__init__()
self.validate_environment(api_key=api_key)
self.api_base = api_base or API_BASE
@ -49,6 +45,7 @@ class BraintrustLogger(CustomLogger):
"Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json",
}
self._project_id_cache: Dict[str, str] = {} # Cache mapping project names to IDs
def validate_environment(self, api_key: Optional[str]):
"""
@ -64,6 +61,43 @@ class BraintrustLogger(CustomLogger):
if len(missing_keys) > 0:
raise Exception("Missing keys={} in environment.".format(missing_keys))
def get_project_id_sync(self, project_name: str) -> str:
"""
Get project ID from name, using cache if available.
If project doesn't exist, creates it.
"""
if project_name in self._project_id_cache:
return self._project_id_cache[project_name]
try:
response = global_braintrust_sync_http_handler.post(
f"{self.api_base}/project", headers=self.headers, json={"name": project_name}
)
project_dict = response.json()
project_id = project_dict["id"]
self._project_id_cache[project_name] = project_id
return project_id
except httpx.HTTPStatusError as e:
raise Exception(f"Failed to register project: {e.response.text}")
async def get_project_id_async(self, project_name: str) -> str:
"""
Async version of get_project_id_sync
"""
if project_name in self._project_id_cache:
return self._project_id_cache[project_name]
try:
response = await global_braintrust_http_handler.post(
f"{self.api_base}/project/register", headers=self.headers, json={"name": project_name}
)
project_dict = response.json()
project_id = project_dict["id"]
self._project_id_cache[project_name] = project_id
return project_id
except httpx.HTTPStatusError as e:
raise Exception(f"Failed to register project: {e.response.text}")
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
@ -82,21 +116,15 @@ class BraintrustLogger(CustomLogger):
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("braintrust"):
trace_param_key = metadata_param_key.replace("braintrust", "", 1)
if trace_param_key in metadata:
verbose_logger.warning(
f"Overwriting Braintrust `{trace_param_key}` from request header"
)
verbose_logger.warning(f"Overwriting Braintrust `{trace_param_key}` from request header")
else:
verbose_logger.debug(
f"Found Braintrust `{trace_param_key}` in request header"
)
verbose_logger.debug(f"Found Braintrust `{trace_param_key}` in request header")
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata
@ -125,42 +153,28 @@ class BraintrustLogger(CustomLogger):
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
try:
litellm_call_id = kwargs.get("litellm_call_id")
project_id = kwargs.get("project_id", None)
if project_id is None:
if self.default_project_id is None:
self.create_sync_default_project_and_experiment()
project_id = self.default_project_id
prompt = {"messages": kwargs.get("messages")}
output = None
choices = []
if response_obj is not None and (
kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
):
output = None
elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
output = response_obj["choices"][0]["message"].json()
elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
output = response_obj.choices[0].text
elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {}
try:
metadata = copy.deepcopy(
metadata
) # Avoid modifying the original metadata
metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata
except Exception:
new_metadata = {}
for key, value in metadata.items():
@ -174,10 +188,20 @@ class BraintrustLogger(CustomLogger):
new_metadata[key] = copy.deepcopy(value)
metadata = new_metadata
# Get project_id from metadata or create default if needed
project_id = metadata.get("project_id")
if project_id is None:
project_name = metadata.get("project_name")
project_id = self.get_project_id_sync(project_name) if project_name else None
if project_id is None:
if self.default_project_id is None:
self.create_sync_default_project_and_experiment()
project_id = self.default_project_id
tags = []
if isinstance(metadata, dict):
for key, value in metadata.items():
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
if (
litellm.langfuse_default_tags is not None
@ -210,22 +234,28 @@ class BraintrustLogger(CustomLogger):
"completion_tokens": usage_obj.completion_tokens,
"total_tokens": usage_obj.total_tokens,
"total_cost": cost,
"time_to_first_token": end_time.timestamp() - start_time.timestamp(),
"start": start_time.timestamp(),
"end": end_time.timestamp(),
}
request_data = {
"id": litellm_call_id,
"input": prompt,
"output": output,
"input": prompt["messages"],
"metadata": clean_metadata,
"tags": tags,
"span_attributes": {"name": "Chat Completion", "type": "llm"},
}
if choices is not None:
request_data["output"] = [choice.dict() for choice in choices]
else:
request_data["output"] = output
if metrics is not None:
request_data["metrics"] = metrics
try:
print_verbose(
f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
)
print_verbose(f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}")
global_braintrust_sync_http_handler.post(
url=f"{self.api_base}/project_logs/{project_id}/insert",
json={"events": [request_data]},
@ -242,36 +272,24 @@ class BraintrustLogger(CustomLogger):
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
try:
litellm_call_id = kwargs.get("litellm_call_id")
project_id = kwargs.get("project_id", None)
if project_id is None:
if self.default_project_id is None:
await self.create_default_project_and_experiment()
project_id = self.default_project_id
prompt = {"messages": kwargs.get("messages")}
output = None
choices = []
if response_obj is not None and (
kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
):
output = None
elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
output = response_obj["choices"][0]["message"].json()
elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
output = response_obj.choices[0].text
elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {}
new_metadata = {}
@ -291,12 +309,20 @@ class BraintrustLogger(CustomLogger):
value[k] = v.isoformat()
new_metadata[key] = value
metadata = new_metadata
# Get project_id from metadata or create default if needed
project_id = metadata.get("project_id")
if project_id is None:
project_name = metadata.get("project_name")
project_id = await self.get_project_id_async(project_name) if project_name else None
if project_id is None:
if self.default_project_id is None:
await self.create_default_project_and_experiment()
project_id = self.default_project_id
tags = []
if isinstance(metadata, dict):
for key, value in metadata.items():
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
if (
litellm.langfuse_default_tags is not None
@ -329,15 +355,31 @@ class BraintrustLogger(CustomLogger):
"completion_tokens": usage_obj.completion_tokens,
"total_tokens": usage_obj.total_tokens,
"total_cost": cost,
"start": start_time.timestamp(),
"end": end_time.timestamp(),
}
api_call_start_time = kwargs.get("api_call_start_time")
completion_start_time = kwargs.get("completion_start_time")
if api_call_start_time is not None and completion_start_time is not None:
metrics["time_to_first_token"] = completion_start_time.timestamp() - api_call_start_time.timestamp()
request_data = {
"id": litellm_call_id,
"input": prompt,
"input": prompt["messages"],
"output": output,
"metadata": clean_metadata,
"tags": tags,
"span_attributes": {"name": "Chat Completion", "type": "llm"},
}
if choices is not None:
request_data["output"] = [choice.dict() for choice in choices]
else:
request_data["output"] = output
if metrics is not None:
request_data["metrics"] = metrics
if metrics is not None:
request_data["metrics"] = metrics

View file

@ -26,8 +26,6 @@ model_list:
revision: main
auth_token: os.environ/HUGGINGFACE_API_KEY
litellm_settings:
service_callback: ["prometheus_system"]
callbacks: ["prometheus"]
cache: true

View file

@ -51,3 +51,27 @@ def test_braintrust_logging():
time.sleep(2)
mock_client.assert_called()
def test_braintrust_logging_specific_project_id():
import litellm
litellm.set_verbose = True
with patch.object(
litellm.integrations.braintrust_logging.global_braintrust_sync_http_handler,
"post",
new=MagicMock(),
) as mock_client:
# set braintrust as a callback, litellm will send the data to braintrust
litellm.callbacks = ["braintrust"]
response = litellm.completion(model="openai/gpt-4o", messages=[{ "content": "Hello, how are you?","role": "user"}], metadata={"project_id": "123"})
time.sleep(2)
# Check that the log was inserted into the correct project
mock_client.assert_called()
_, kwargs = mock_client.call_args
assert 'url' in kwargs
assert kwargs['url'] == "https://api.braintrustdata.com/v1/project_logs/123/insert"