diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 41bff6d84b..cc41d85f14 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,15 @@ repos: -- repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.4.8 +- repo: https://github.com/psf/black + rev: 24.2.0 hooks: - # Run the linter. - - id: ruff - exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/proxy/tests/ - # Run the formatter. - - id: ruff-format - exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/proxy/tests/ + - id: black +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 # The version of flake8 to use + hooks: + - id: flake8 + exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/proxy/tests/ + additional_dependencies: [flake8-print] + files: litellm/.*\.py - repo: local hooks: - id: check-files-match diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index aa67839141..5a5b6d14dd 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -165,7 +165,8 @@ class OpenTelemetry(CustomLogger): proxy_server_request = litellm_params.get("proxy_server_request", {}) or {} headers = proxy_server_request.get("headers", {}) or {} traceparent = headers.get("traceparent", None) - parent_otel_span = litellm_params.get("litellm_parent_otel_span", None) + _metadata = litellm_params.get("metadata", {}) + parent_otel_span = _metadata.get("litellm_parent_otel_span", None) """ Two way to use parents in opentelemetry diff --git a/litellm/main.py b/litellm/main.py index 2b1712d91b..5012ef1662 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -607,7 +607,6 @@ def completion( client = kwargs.get("client", None) ### Admin Controls ### no_log = kwargs.get("no-log", False) - litellm_parent_otel_span = kwargs.get("litellm_parent_otel_span", None) ######## end of unpacking kwargs ########### openai_params = [ "functions", @@ -697,7 +696,6 @@ def completion( "allowed_model_region", "model_config", "fastest_response", - "litellm_parent_otel_span", ] default_params = openai_params + litellm_params @@ -882,7 +880,6 @@ def completion( input_cost_per_token=input_cost_per_token, output_cost_per_second=output_cost_per_second, output_cost_per_token=output_cost_per_token, - litellm_parent_otel_span=litellm_parent_otel_span, ) logging.update_environment_variables( model=model, diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py new file mode 100644 index 0000000000..945799b4cf --- /dev/null +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -0,0 +1,130 @@ +import copy +from fastapi import Request +from typing import Any, Dict, Optional, TYPE_CHECKING +from litellm.proxy._types import UserAPIKeyAuth +from litellm._logging import verbose_proxy_logger, verbose_logger + +if TYPE_CHECKING: + from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig + + ProxyConfig = _ProxyConfig +else: + ProxyConfig = Any + + +def parse_cache_control(cache_control): + cache_dict = {} + directives = cache_control.split(", ") + + for directive in directives: + if "=" in directive: + key, value = directive.split("=") + cache_dict[key] = value + else: + cache_dict[directive] = True + + return cache_dict + + +async def add_litellm_data_to_request( + data: dict, + request: Request, + user_api_key_dict: UserAPIKeyAuth, + proxy_config: ProxyConfig, + general_settings: Optional[Dict[str, Any]] = None, + version: Optional[str] = None, +): + """ + Adds LiteLLM-specific data to the request. + + Args: + data (dict): The data dictionary to be modified. + request (Request): The incoming request. + user_api_key_dict (UserAPIKeyAuth): The user API key dictionary. + general_settings (Optional[Dict[str, Any]], optional): General settings. Defaults to None. + version (Optional[str], optional): Version. Defaults to None. + + Returns: + dict: The modified data dictionary. + + """ + query_params = dict(request.query_params) + if "api-version" in query_params: + data["api_version"] = query_params["api-version"] + + # Include original request and headers in the data + data["proxy_server_request"] = { + "url": str(request.url), + "method": request.method, + "headers": dict(request.headers), + "body": copy.copy(data), # use copy instead of deepcopy + } + + ## Cache Controls + headers = request.headers + verbose_proxy_logger.debug("Request Headers: %s", headers) + cache_control_header = headers.get("Cache-Control", None) + if cache_control_header: + cache_dict = parse_cache_control(cache_control_header) + data["ttl"] = cache_dict.get("s-maxage") + + verbose_proxy_logger.debug("receiving data: %s", data) + # users can pass in 'user' param to /chat/completions. Don't override it + if data.get("user", None) is None and user_api_key_dict.user_id is not None: + # if users are using user_api_key_auth, set `user` in `data` + data["user"] = user_api_key_dict.user_id + + if "metadata" not in data: + data["metadata"] = {} + data["metadata"]["user_api_key"] = user_api_key_dict.api_key + data["metadata"]["user_api_key_alias"] = getattr( + user_api_key_dict, "key_alias", None + ) + data["metadata"]["user_api_end_user_max_budget"] = getattr( + user_api_key_dict, "end_user_max_budget", None + ) + data["metadata"]["litellm_api_version"] = version + + if general_settings is not None: + data["metadata"]["global_max_parallel_requests"] = general_settings.get( + "global_max_parallel_requests", None + ) + + data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id + data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id + data["metadata"]["user_api_key_team_id"] = getattr( + user_api_key_dict, "team_id", None + ) + data["metadata"]["user_api_key_team_alias"] = getattr( + user_api_key_dict, "team_alias", None + ) + data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata + _headers = dict(request.headers) + _headers.pop( + "authorization", None + ) # do not store the original `sk-..` api key in the db + data["metadata"]["headers"] = _headers + data["metadata"]["endpoint"] = str(request.url) + # Add the OTEL Parent Trace before sending it LiteLLM + data["metadata"]["litellm_parent_otel_span"] = user_api_key_dict.parent_otel_span + + ### END-USER SPECIFIC PARAMS ### + if user_api_key_dict.allowed_model_region is not None: + data["allowed_model_region"] = user_api_key_dict.allowed_model_region + + ### TEAM-SPECIFIC PARAMS ### + if user_api_key_dict.team_id is not None: + team_config = await proxy_config.load_team_config( + team_id=user_api_key_dict.team_id + ) + if len(team_config) == 0: + pass + else: + team_id = team_config.pop("team_id", None) + data["metadata"]["team_id"] = team_id + data = { + **team_config, + **data, + } # add the team-specific configs to the completion call + + return data diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2071ef63bc..564f886f19 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -89,6 +89,7 @@ import litellm from litellm.types.llms.openai import ( HttpxBinaryResponseContent, ) +from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request from litellm.proxy.utils import ( PrismaClient, DBClient, @@ -3867,20 +3868,6 @@ def get_litellm_model_info(model: dict = {}): return {} -def parse_cache_control(cache_control): - cache_dict = {} - directives = cache_control.split(", ") - - for directive in directives: - if "=" in directive: - key, value = directive.split("=") - cache_dict[key] = value - else: - cache_dict[directive] = True - - return cache_dict - - def on_backoff(details): # The 'tries' key in the details dictionary contains the number of completed tries verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"]) @@ -4202,28 +4189,15 @@ async def chat_completion( except: data = json.loads(body_str) - # Azure OpenAI only: check if user passed api-version - query_params = dict(request.query_params) - if "api-version" in query_params: - data["api_version"] = query_params["api-version"] + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) - # Include original request and headers in the data - data["proxy_server_request"] = { - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - ## Cache Controls - headers = request.headers - verbose_proxy_logger.debug("Request Headers: %s", headers) - cache_control_header = headers.get("Cache-Control", None) - if cache_control_header: - cache_dict = parse_cache_control(cache_control_header) - data["ttl"] = cache_dict.get("s-maxage") - - verbose_proxy_logger.debug("receiving data: %s", data) data["model"] = ( general_settings.get("completion_model", None) # server default or user_model # model name passed via cli args @@ -4231,65 +4205,6 @@ async def chat_completion( or data["model"] # default passed in http request ) - # users can pass in 'user' param to /chat/completions. Don't override it - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - # if users are using user_api_key_auth, set `user` in `data` - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["user_api_end_user_max_budget"] = getattr( - user_api_key_dict, "end_user_max_budget", None - ) - data["metadata"]["litellm_api_version"] = version - - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["endpoint"] = str(request.url) - # Add the OTEL Parent Trace before sending it LiteLLM - data["litellm_parent_otel_span"] = user_api_key_dict.parent_otel_span - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - _is_valid_team_configs( - team_id=team_id, team_config=team_config, request_data=data - ) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call - - ### END-USER SPECIFIC PARAMS ### - if user_api_key_dict.allowed_model_region is not None: - data["allowed_model_region"] = user_api_key_dict.allowed_model_region - global user_temperature, user_request_timeout, user_max_tokens, user_api_base # override with user settings, these are params passed via cli if user_temperature: @@ -4548,7 +4463,6 @@ async def completion( except: data = json.loads(body_str) - data["user"] = data.get("user", user_api_key_dict.user_id) data["model"] = ( general_settings.get("completion_model", None) # server default or user_model # model name passed via cli args @@ -4557,30 +4471,15 @@ async def completion( ) if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["endpoint"] = str(request.url) # override with user settings, these are params passed via cli if user_temperature: @@ -4777,15 +4676,14 @@ async def embeddings( data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) data["model"] = ( general_settings.get("embedding_model", None) # server default @@ -4795,45 +4693,6 @@ async def embeddings( ) if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call ### MODEL ALIAS MAPPING ### # check if model name in model alias map @@ -4993,15 +4852,14 @@ async def image_generation( data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) data["model"] = ( general_settings.get("image_generation_model", None) # server default @@ -5011,46 +4869,6 @@ async def image_generation( if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call - ### MODEL ALIAS MAPPING ### # check if model name in model alias map # get the actual model name @@ -5180,12 +4998,14 @@ async def audio_speech( data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) if data.get("user", None) is None and user_api_key_dict.user_id is not None: data["user"] = user_api_key_dict.user_id @@ -5193,46 +5013,6 @@ async def audio_speech( if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call - router_model_names = llm_router.model_names if llm_router is not None else [] ### CALL HOOKS ### - modify incoming data / reject request before calling the model @@ -5350,12 +5130,14 @@ async def audio_transcriptions( data = {key: value for key, value in form_data.items() if key != "file"} # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) if data.get("user", None) is None and user_api_key_dict.user_id is not None: data["user"] = user_api_key_dict.user_id @@ -5368,47 +5150,6 @@ async def audio_transcriptions( if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - data["metadata"]["file_name"] = file.filename - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call - router_model_names = llm_router.model_names if llm_router is not None else [] assert ( @@ -5564,55 +5305,14 @@ async def get_assistants( body = await request.body() # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - data["metadata"]["litellm_api_version"] = version - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -5697,55 +5397,14 @@ async def create_threads( body = await request.body() # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "litellm_metadata" not in data: - data["litellm_metadata"] = {} - data["litellm_metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["litellm_metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["litellm_metadata"]["headers"] = _headers - data["litellm_metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["litellm_metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["litellm_metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["litellm_metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["litellm_metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["litellm_metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["litellm_metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -5829,55 +5488,14 @@ async def get_thread( try: # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -5964,55 +5582,14 @@ async def add_messages( data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "litellm_metadata" not in data: - data["litellm_metadata"] = {} - data["litellm_metadata"]["user_api_key"] = user_api_key_dict.api_key - data["litellm_metadata"]["litellm_api_version"] = version - data["litellm_metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["litellm_metadata"]["headers"] = _headers - data["litellm_metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["litellm_metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["litellm_metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["litellm_metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["litellm_metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["litellm_metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["litellm_metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -6095,55 +5672,14 @@ async def get_messages( data: Dict = {} try: # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -6228,55 +5764,14 @@ async def run_thread( body = await request.body() data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "litellm_metadata" not in data: - data["litellm_metadata"] = {} - data["litellm_metadata"]["user_api_key"] = user_api_key_dict.api_key - data["litellm_metadata"]["litellm_api_version"] = version - data["litellm_metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["litellm_metadata"]["headers"] = _headers - data["litellm_metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["litellm_metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["litellm_metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["litellm_metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["litellm_metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["litellm_metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["litellm_metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch if llm_router is None: @@ -6392,55 +5887,14 @@ async def create_batch( data = {key: value for key, value in form_data.items() if key != "file"} # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call _create_batch_data = CreateBatchRequest(**data) @@ -6537,55 +5991,14 @@ async def retrieve_batch( data = {key: value for key, value in form_data.items() if key != "file"} # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call _retrieve_batch_request = RetrieveBatchRequest( batch_id=batch_id, @@ -6697,55 +6110,14 @@ async def create_file( data = {key: value for key, value in form_data.items() if key != "file"} # Include original request and headers in the data - data["proxy_server_request"] = { # type: ignore - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id - - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call _create_file_request = CreateFileRequest() @@ -6839,15 +6211,14 @@ async def moderations( data = orjson.loads(body) # Include original request and headers in the data - data["proxy_server_request"] = { - "url": str(request.url), - "method": request.method, - "headers": dict(request.headers), - "body": copy.copy(data), # use copy instead of deepcopy - } - - if data.get("user", None) is None and user_api_key_dict.user_id is not None: - data["user"] = user_api_key_dict.user_id + data = await add_litellm_data_to_request( + data=data, + request=request, + general_settings=general_settings, + user_api_key_dict=user_api_key_dict, + version=version, + proxy_config=proxy_config, + ) data["model"] = ( general_settings.get("moderation_model", None) # server default @@ -6857,46 +6228,6 @@ async def moderations( if user_model: data["model"] = user_model - if "metadata" not in data: - data["metadata"] = {} - data["metadata"]["user_api_key"] = user_api_key_dict.api_key - data["metadata"]["litellm_api_version"] = version - data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata - _headers = dict(request.headers) - _headers.pop( - "authorization", None - ) # do not store the original `sk-..` api key in the db - data["metadata"]["headers"] = _headers - data["metadata"]["global_max_parallel_requests"] = general_settings.get( - "global_max_parallel_requests", None - ) - data["metadata"]["user_api_key_alias"] = getattr( - user_api_key_dict, "key_alias", None - ) - data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id - data["metadata"]["user_api_key_team_id"] = getattr( - user_api_key_dict, "team_id", None - ) - data["metadata"]["user_api_key_team_alias"] = getattr( - user_api_key_dict, "team_alias", None - ) - data["metadata"]["endpoint"] = str(request.url) - - ### TEAM-SPECIFIC PARAMS ### - if user_api_key_dict.team_id is not None: - team_config = await proxy_config.load_team_config( - team_id=user_api_key_dict.team_id - ) - if len(team_config) == 0: - pass - else: - team_id = team_config.pop("team_id", None) - data["metadata"]["team_id"] = team_id - data = { - **team_config, - **data, - } # add the team-specific configs to the completion call - router_model_names = llm_router.model_names if llm_router is not None else [] ### CALL HOOKS ### - modify incoming data / reject request before calling the model diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 0b5dcd85ba..afe059ce1e 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -73,7 +73,8 @@ def print_verbose(print_statement): def safe_deep_copy(data): if isinstance(data, dict): # remove litellm_parent_otel_span since this is not picklable - data.pop("litellm_parent_otel_span", None) + if "metadata" in data and "litellm_parent_otel_span" in data["metadata"]: + data["metadata"].pop("litellm_parent_otel_span") new_data = copy.deepcopy(data) return new_data diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 2c643eff0b..a37f8adbd1 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -152,7 +152,6 @@ def test_chat_completion(mock_acompletion, client_no_auth): specific_deployment=True, metadata=mock.ANY, proxy_server_request=mock.ANY, - litellm_parent_otel_span=mock.ANY, ) print(f"response - {response.text}") assert response.status_code == 200 diff --git a/litellm/utils.py b/litellm/utils.py index ad9779f2d7..ae5879f07f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4927,7 +4927,6 @@ def get_litellm_params( input_cost_per_token=None, output_cost_per_token=None, output_cost_per_second=None, - litellm_parent_otel_span=None, ): litellm_params = { "acompletion": acompletion, @@ -4950,7 +4949,6 @@ def get_litellm_params( "input_cost_per_second": input_cost_per_second, "output_cost_per_token": output_cost_per_token, "output_cost_per_second": output_cost_per_second, - "litellm_parent_otel_span": litellm_parent_otel_span, } return litellm_params