mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
* fix(utils.py): add logprobs support for together ai Fixes https://github.com/BerriAI/litellm/issues/6724 * feat(pass_through_endpoints/): add anthropic/ pass-through endpoint adds new `anthropic/` pass-through endpoint + refactors docs * feat(spend_management_endpoints.py): allow /global/spend/report to query team + customer id enables seeing spend for a customer in a team * Add integration with MLflow Tracing (#6147) * Add MLflow logger Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Streaming handling Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * lint Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * address comments and fix issues Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * address comments and fix issues Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Move logger construction code Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Add docs Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * async handlers Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * new picture Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * fix(mlflow.py): fix ruff linting errors * ci(config.yml): add mlflow to ci testing * fix: fix test * test: fix test * Litellm key update fix (#6710) * fix(caching): convert arg to equivalent kwargs in llm caching handler prevent unexpected errors * fix(caching_handler.py): don't pass args to caching * fix(caching): remove all *args from caching.py * fix(caching): consistent function signatures + abc method * test(caching_unit_tests.py): add unit tests for llm caching ensures coverage for common caching scenarios across different implementations * refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one * fix(router.py): drop redis password requirement * fix(proxy_server.py): fix faulty slack alerting check * fix(langfuse.py): avoid copying functions/thread lock objects in metadata fixes metadata copy error when parent otel span in metadata * test: update test * fix(key_management_endpoints.py): fix /key/update with metadata update * fix(key_management_endpoints.py): fix key_prepare_update helper * fix(key_management_endpoints.py): reset value to none if set in key update * fix: update test ' * Litellm dev 11 11 2024 (#6693) * fix(__init__.py): add 'watsonx_text' as mapped llm api route Fixes https://github.com/BerriAI/litellm/issues/6663 * fix(opentelemetry.py): fix passing parallel tool calls to otel Fixes https://github.com/BerriAI/litellm/issues/6677 * refactor(test_opentelemetry_unit_tests.py): create a base set of unit tests for all logging integrations - test for parallel tool call handling reduces bugs in repo * fix(__init__.py): update provider-model mapping to include all known provider-model mappings Fixes https://github.com/BerriAI/litellm/issues/6669 * feat(anthropic): support passing document in llm api call * docs(anthropic.md): add pdf anthropic call to docs + expose new 'supports_pdf_input' function * fix(factory.py): fix linting error * add clear doc string for GCS bucket logging * Add docs to export logs to Laminar (#6674) * Add docs to export logs to Laminar * minor fix: newline at end of file * place laminar after http and grpc * (Feat) Add langsmith key based logging (#6682) * add langsmith_api_key to StandardCallbackDynamicParams * create a file for langsmith types * langsmith add key / team based logging * add key based logging for langsmith * fix langsmith key based logging * fix linting langsmith * remove NOQA violation * add unit test coverage for all helpers in test langsmith * test_langsmith_key_based_logging * docs langsmith key based logging * run langsmith tests in logging callback tests * fix logging testing * test_langsmith_key_based_logging * test_add_callback_via_key_litellm_pre_call_utils_langsmith * add debug statement langsmith key based logging * test_langsmith_key_based_logging * (fix) OpenAI's optional messages[].name does not work with Mistral API (#6701) * use helper for _transform_messages mistral * add test_message_with_name to base LLMChat test * fix linting * add xAI on Admin UI (#6680) * (docs) add benchmarks on 1K RPS (#6704) * docs litellm proxy benchmarks * docs GCS bucket * doc fix - reduce clutter on logging doc title * (feat) add cost tracking stable diffusion 3 on Bedrock (#6676) * add cost tracking for sd3 * test_image_generation_bedrock * fix get model info for image cost * add cost_calculator for stability 1 models * add unit testing for bedrock image cost calc * test_cost_calculator_with_no_optional_params * add test_cost_calculator_basic * correctly allow size Optional * fix cost_calculator * sd3 unit tests cost calc * fix raise correct error 404 when /key/info is called on non-existent key (#6653) * fix raise correct error on /key/info * add not_found_error error * fix key not found in DB error * use 1 helper for checking token hash * fix error code on key info * fix test key gen prisma * test_generate_and_call_key_info * test fix test_call_with_valid_model_using_all_models * fix key info tests * bump: version 1.52.4 → 1.52.5 * add defaults used for GCS logging * LiteLLM Minor Fixes & Improvements (11/12/2024) (#6705) * fix(caching): convert arg to equivalent kwargs in llm caching handler prevent unexpected errors * fix(caching_handler.py): don't pass args to caching * fix(caching): remove all *args from caching.py * fix(caching): consistent function signatures + abc method * test(caching_unit_tests.py): add unit tests for llm caching ensures coverage for common caching scenarios across different implementations * refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one * fix(router.py): drop redis password requirement * fix(proxy_server.py): fix faulty slack alerting check * fix(langfuse.py): avoid copying functions/thread lock objects in metadata fixes metadata copy error when parent otel span in metadata * test: update test * bump: version 1.52.5 → 1.52.6 * (feat) helm hook to sync db schema (#6715) * v0 migration job * fix job * fix migrations job.yml * handle standalone DB on helm hook * fix argo cd annotations * fix db migration helm hook * fix migration job * doc fix Using Http/2 with Hypercorn * (fix proxy redis) Add redis sentinel support (#6154) * add sentinel_password support * add doc for setting redis sentinel password * fix redis sentinel - use sentinel password * Fix: Update gpt-4o costs to that of gpt-4o-2024-08-06 (#6714) Fixes #6713 * (fix) using Anthropic `response_format={"type": "json_object"}` (#6721) * add support for response_format=json anthropic * add test_json_response_format to baseLLM ChatTest * fix test_litellm_anthropic_prompt_caching_tools * fix test_anthropic_function_call_with_no_schema * test test_create_json_tool_call_for_response_format * (feat) Add cost tracking for Azure Dall-e-3 Image Generation + use base class to ensure basic image generation tests pass (#6716) * add BaseImageGenTest * use 1 class for unit testing * add debugging to BaseImageGenTest * TestAzureOpenAIDalle3 * fix response_cost_calculator * test_basic_image_generation * fix img gen basic test * fix _select_model_name_for_cost_calc * fix test_aimage_generation_bedrock_with_optional_params * fix undo changes cost tracking * fix response_cost_calculator * fix test_cost_azure_gpt_35 * fix remove dup test (#6718) * (build) update db helm hook * (build) helm db pre sync hook * (build) helm db sync hook * test: run test_team_logging firdst --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Co-authored-by: Kilian Lieret <kilian.lieret@posteo.de> * test: update test * test: skip anthropic overloaded error * test: cleanup test * test: update tests * test: fix test * test: handle gemini overloaded model error * test: handle internal server error * test: handle anthropic overloaded error * test: handle claude instability --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> Co-authored-by: Yuki Watanabe <31463517+B-Step62@users.noreply.github.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Co-authored-by: Kilian Lieret <kilian.lieret@posteo.de>
243 lines
7.7 KiB
Python
243 lines
7.7 KiB
Python
import datetime
|
|
import json
|
|
import os
|
|
import secrets
|
|
import traceback
|
|
from datetime import datetime as dt
|
|
from typing import Optional
|
|
|
|
from pydantic import BaseModel
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_proxy_logger
|
|
from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
|
|
from litellm.proxy.utils import PrismaClient, hash_token
|
|
|
|
|
|
def _is_master_key(api_key: str, _master_key: Optional[str]) -> bool:
|
|
if _master_key is None:
|
|
return False
|
|
|
|
## string comparison
|
|
is_master_key = secrets.compare_digest(api_key, _master_key)
|
|
if is_master_key:
|
|
return True
|
|
|
|
## hash comparison
|
|
is_master_key = secrets.compare_digest(api_key, hash_token(_master_key))
|
|
if is_master_key:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_logging_payload(
|
|
kwargs, response_obj, start_time, end_time, end_user_id: Optional[str]
|
|
) -> SpendLogsPayload:
|
|
from pydantic import Json
|
|
|
|
from litellm.proxy._types import LiteLLM_SpendLogs
|
|
from litellm.proxy.proxy_server import general_settings, master_key
|
|
|
|
verbose_proxy_logger.debug(
|
|
f"SpendTable: get_logging_payload - kwargs: {kwargs}\n\n"
|
|
)
|
|
|
|
if kwargs is None:
|
|
kwargs = {}
|
|
if response_obj is None:
|
|
response_obj = {}
|
|
# standardize this function to be used across, s3, dynamoDB, langfuse logging
|
|
litellm_params = kwargs.get("litellm_params", {})
|
|
metadata = (
|
|
litellm_params.get("metadata", {}) or {}
|
|
) # if litellm_params['metadata'] == None
|
|
completion_start_time = kwargs.get("completion_start_time", end_time)
|
|
call_type = kwargs.get("call_type")
|
|
cache_hit = kwargs.get("cache_hit", False)
|
|
usage = response_obj.get("usage", None) or {}
|
|
if isinstance(usage, litellm.Usage):
|
|
usage = dict(usage)
|
|
id = response_obj.get("id", kwargs.get("litellm_call_id"))
|
|
api_key = metadata.get("user_api_key", "")
|
|
if api_key is not None and isinstance(api_key, str):
|
|
if api_key.startswith("sk-"):
|
|
# hash the api_key
|
|
api_key = hash_token(api_key)
|
|
if (
|
|
_is_master_key(api_key=api_key, _master_key=master_key)
|
|
and general_settings.get("disable_adding_master_key_hash_to_db") is True
|
|
):
|
|
api_key = "litellm_proxy_master_key" # use a known alias, if the user disabled storing master key in db
|
|
|
|
_model_id = metadata.get("model_info", {}).get("id", "")
|
|
_model_group = metadata.get("model_group", "")
|
|
|
|
request_tags = (
|
|
json.dumps(metadata.get("tags", []))
|
|
if isinstance(metadata.get("tags", []), list)
|
|
else "[]"
|
|
)
|
|
|
|
# clean up litellm metadata
|
|
clean_metadata = SpendLogsMetadata(
|
|
user_api_key=None,
|
|
user_api_key_alias=None,
|
|
user_api_key_team_id=None,
|
|
user_api_key_user_id=None,
|
|
user_api_key_team_alias=None,
|
|
spend_logs_metadata=None,
|
|
requester_ip_address=None,
|
|
additional_usage_values=None,
|
|
)
|
|
if isinstance(metadata, dict):
|
|
verbose_proxy_logger.debug(
|
|
"getting payload for SpendLogs, available keys in metadata: "
|
|
+ str(list(metadata.keys()))
|
|
)
|
|
|
|
# Filter the metadata dictionary to include only the specified keys
|
|
clean_metadata = SpendLogsMetadata(
|
|
**{ # type: ignore
|
|
key: metadata[key]
|
|
for key in SpendLogsMetadata.__annotations__.keys()
|
|
if key in metadata
|
|
}
|
|
)
|
|
|
|
special_usage_fields = ["completion_tokens", "prompt_tokens", "total_tokens"]
|
|
additional_usage_values = {}
|
|
for k, v in usage.items():
|
|
if k not in special_usage_fields:
|
|
if isinstance(v, BaseModel):
|
|
v = v.model_dump()
|
|
additional_usage_values.update({k: v})
|
|
clean_metadata["additional_usage_values"] = additional_usage_values
|
|
|
|
if litellm.cache is not None:
|
|
cache_key = litellm.cache.get_cache_key(**kwargs)
|
|
else:
|
|
cache_key = "Cache OFF"
|
|
if cache_hit is True:
|
|
import time
|
|
|
|
id = f"{id}_cache_hit{time.time()}" # SpendLogs does not allow duplicate request_id
|
|
|
|
try:
|
|
payload: SpendLogsPayload = SpendLogsPayload(
|
|
request_id=str(id),
|
|
call_type=call_type or "",
|
|
api_key=str(api_key),
|
|
cache_hit=str(cache_hit),
|
|
startTime=start_time,
|
|
endTime=end_time,
|
|
completionStartTime=completion_start_time,
|
|
model=kwargs.get("model", "") or "",
|
|
user=kwargs.get("litellm_params", {})
|
|
.get("metadata", {})
|
|
.get("user_api_key_user_id", "")
|
|
or "",
|
|
team_id=kwargs.get("litellm_params", {})
|
|
.get("metadata", {})
|
|
.get("user_api_key_team_id", "")
|
|
or "",
|
|
metadata=json.dumps(clean_metadata),
|
|
cache_key=cache_key,
|
|
spend=kwargs.get("response_cost", 0),
|
|
total_tokens=usage.get("total_tokens", 0),
|
|
prompt_tokens=usage.get("prompt_tokens", 0),
|
|
completion_tokens=usage.get("completion_tokens", 0),
|
|
request_tags=request_tags,
|
|
end_user=end_user_id or "",
|
|
api_base=litellm_params.get("api_base", ""),
|
|
model_group=_model_group,
|
|
model_id=_model_id,
|
|
requester_ip_address=clean_metadata.get("requester_ip_address", None),
|
|
)
|
|
|
|
verbose_proxy_logger.debug(
|
|
"SpendTable: created payload - payload: %s\n\n", payload
|
|
)
|
|
|
|
return payload
|
|
except Exception as e:
|
|
verbose_proxy_logger.exception(
|
|
"Error creating spendlogs object - {}".format(str(e))
|
|
)
|
|
raise e
|
|
|
|
|
|
async def get_spend_by_team_and_customer(
|
|
start_date: dt,
|
|
end_date: dt,
|
|
team_id: str,
|
|
customer_id: str,
|
|
prisma_client: PrismaClient,
|
|
):
|
|
sql_query = """
|
|
WITH SpendByModelApiKey AS (
|
|
SELECT
|
|
date_trunc('day', sl."startTime") AS group_by_day,
|
|
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
|
sl.end_user AS customer,
|
|
sl.model,
|
|
sl.api_key,
|
|
SUM(sl.spend) AS model_api_spend,
|
|
SUM(sl.total_tokens) AS model_api_tokens
|
|
FROM
|
|
"LiteLLM_SpendLogs" sl
|
|
LEFT JOIN
|
|
"LiteLLM_TeamTable" tt
|
|
ON
|
|
sl.team_id = tt.team_id
|
|
WHERE
|
|
sl."startTime" BETWEEN $1::date AND $2::date
|
|
AND sl.team_id = $3
|
|
AND sl.end_user = $4
|
|
GROUP BY
|
|
date_trunc('day', sl."startTime"),
|
|
tt.team_alias,
|
|
sl.end_user,
|
|
sl.model,
|
|
sl.api_key
|
|
)
|
|
SELECT
|
|
group_by_day,
|
|
jsonb_agg(jsonb_build_object(
|
|
'team_name', team_name,
|
|
'customer', customer,
|
|
'total_spend', total_spend,
|
|
'metadata', metadata
|
|
)) AS teams_customers
|
|
FROM (
|
|
SELECT
|
|
group_by_day,
|
|
team_name,
|
|
customer,
|
|
SUM(model_api_spend) AS total_spend,
|
|
jsonb_agg(jsonb_build_object(
|
|
'model', model,
|
|
'api_key', api_key,
|
|
'spend', model_api_spend,
|
|
'total_tokens', model_api_tokens
|
|
)) AS metadata
|
|
FROM
|
|
SpendByModelApiKey
|
|
GROUP BY
|
|
group_by_day,
|
|
team_name,
|
|
customer
|
|
) AS aggregated
|
|
GROUP BY
|
|
group_by_day
|
|
ORDER BY
|
|
group_by_day;
|
|
"""
|
|
|
|
db_response = await prisma_client.db.query_raw(
|
|
sql_query, start_date, end_date, team_id, customer_id
|
|
)
|
|
if db_response is None:
|
|
return []
|
|
|
|
return db_response
|