From 6575143460e3dcebc40ff1d5e4006ef0780841b5 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 8 May 2024 16:00:08 -0700 Subject: [PATCH] feat(proxy_server.py): return litellm version in response headers --- .pre-commit-config.yaml | 16 ++--- enterprise/utils.py | 2 +- litellm/_redis.py | 4 +- litellm/budget_manager.py | 2 +- litellm/integrations/aispend.py | 1 - litellm/integrations/athina.py | 57 ++++++++++++---- litellm/integrations/berrispend.py | 2 +- litellm/integrations/clickhouse.py | 1 - litellm/integrations/custom_logger.py | 1 - litellm/integrations/datadog.py | 2 +- litellm/integrations/dynamodb.py | 2 +- litellm/integrations/greenscale.py | 45 +++++++++---- litellm/integrations/helicone.py | 2 +- litellm/integrations/langsmith.py | 10 +-- litellm/integrations/openmeter.py | 3 +- litellm/integrations/prometheus.py | 3 +- litellm/integrations/prometheus_services.py | 3 +- litellm/integrations/prompt_layer.py | 9 ++- litellm/integrations/s3.py | 1 - litellm/integrations/supabase.py | 2 +- litellm/llms/ai21.py | 4 +- litellm/llms/aleph_alpha.py | 4 +- litellm/llms/anthropic.py | 4 +- litellm/llms/azure.py | 4 +- litellm/llms/azure_text.py | 2 +- litellm/llms/baseten.py | 2 +- litellm/llms/bedrock.py | 13 ++-- litellm/llms/cloudflare.py | 4 +- litellm/llms/cohere.py | 4 +- litellm/llms/cohere_chat.py | 4 +- litellm/llms/maritalk.py | 2 +- litellm/llms/nlp_cloud.py | 2 +- litellm/llms/ollama.py | 44 +++++++++---- litellm/llms/oobabooga.py | 2 +- litellm/llms/openai.py | 1 - litellm/llms/petals.py | 2 +- litellm/llms/replicate.py | 4 +- litellm/llms/sagemaker.py | 10 +-- litellm/llms/together_ai.py | 4 +- litellm/llms/vertex_ai.py | 4 +- litellm/llms/vertex_ai_anthropic.py | 4 +- litellm/llms/vllm.py | 4 +- litellm/llms/watsonx.py | 4 +- litellm/proxy/proxy_cli.py | 4 +- litellm/proxy/proxy_server.py | 72 ++++++++++++++++++--- litellm/proxy/utils.py | 4 +- litellm/router_strategy/least_busy.py | 2 +- litellm/router_strategy/lowest_cost.py | 2 +- litellm/router_strategy/lowest_latency.py | 4 +- litellm/utils.py | 12 ++-- 50 files changed, 260 insertions(+), 140 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc41d85f1..e8bb1ff66 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,11 +16,11 @@ repos: name: Check if files match entry: python3 ci_cd/check_files_match.py language: system -- repo: local - hooks: - - id: mypy - name: mypy - entry: python3 -m mypy --ignore-missing-imports - language: system - types: [python] - files: ^litellm/ \ No newline at end of file +# - repo: local +# hooks: +# - id: mypy +# name: mypy +# entry: python3 -m mypy --ignore-missing-imports +# language: system +# types: [python] +# files: ^litellm/ \ No newline at end of file diff --git a/enterprise/utils.py b/enterprise/utils.py index 4a42dc996..05bd7dac6 100644 --- a/enterprise/utils.py +++ b/enterprise/utils.py @@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]): def _forecast_daily_cost(data: list): - import requests + import requests # type: ignore from datetime import datetime, timedelta if len(data) == 0: diff --git a/litellm/_redis.py b/litellm/_redis.py index d7789472c..d72016dcd 100644 --- a/litellm/_redis.py +++ b/litellm/_redis.py @@ -10,8 +10,8 @@ # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation import os import inspect -import redis, litellm -import redis.asyncio as async_redis +import redis, litellm # type: ignore +import redis.asyncio as async_redis # type: ignore from typing import List, Optional diff --git a/litellm/budget_manager.py b/litellm/budget_manager.py index 841015753..9ef4bfafa 100644 --- a/litellm/budget_manager.py +++ b/litellm/budget_manager.py @@ -10,7 +10,7 @@ import os, json, time import litellm from litellm.utils import ModelResponse -import requests, threading +import requests, threading # type: ignore from typing import Optional, Union, Literal diff --git a/litellm/integrations/aispend.py b/litellm/integrations/aispend.py index 2015d45dd..a893f8923 100644 --- a/litellm/integrations/aispend.py +++ b/litellm/integrations/aispend.py @@ -1,7 +1,6 @@ #### What this does #### # On success + failure, log events to aispend.io import dotenv, os -import requests dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/athina.py b/litellm/integrations/athina.py index 897cf6c8d..660dd51ef 100644 --- a/litellm/integrations/athina.py +++ b/litellm/integrations/athina.py @@ -4,18 +4,30 @@ import datetime class AthinaLogger: def __init__(self): import os + self.athina_api_key = os.getenv("ATHINA_API_KEY") self.headers = { "athina-api-key": self.athina_api_key, - "Content-Type": "application/json" + "Content-Type": "application/json", } self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference" - self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"] + self.additional_keys = [ + "environment", + "prompt_slug", + "customer_id", + "customer_user_id", + "session_id", + "external_reference_id", + "context", + "expected_response", + "user_query", + ] def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): - import requests + import requests # type: ignore import json import traceback + try: response_json = response_obj.model_dump() if response_obj else {} data = { @@ -23,32 +35,51 @@ class AthinaLogger: "request": kwargs, "response": response_json, "prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"), - "completion_tokens": response_json.get("usage", {}).get("completion_tokens"), + "completion_tokens": response_json.get("usage", {}).get( + "completion_tokens" + ), "total_tokens": response_json.get("usage", {}).get("total_tokens"), } - - if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime: - data["response_time"] = int((end_time - start_time).total_seconds() * 1000) + + if ( + type(end_time) == datetime.datetime + and type(start_time) == datetime.datetime + ): + data["response_time"] = int( + (end_time - start_time).total_seconds() * 1000 + ) if "messages" in kwargs: data["prompt"] = kwargs.get("messages", None) # Directly add tools or functions if present optional_params = kwargs.get("optional_params", {}) - data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"]) + data.update( + (k, v) + for k, v in optional_params.items() + if k in ["tools", "functions"] + ) # Add additional metadata keys - metadata = kwargs.get("litellm_params", {}).get("metadata", {}) + metadata = kwargs.get("litellm_params", {}).get("metadata", {}) if metadata: for key in self.additional_keys: if key in metadata: data[key] = metadata[key] - response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str)) + response = requests.post( + self.athina_logging_url, + headers=self.headers, + data=json.dumps(data, default=str), + ) if response.status_code != 200: - print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}") + print_verbose( + f"Athina Logger Error - {response.text}, {response.status_code}" + ) else: print_verbose(f"Athina Logger Succeeded - {response.text}") except Exception as e: - print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}") - pass \ No newline at end of file + print_verbose( + f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}" + ) + pass diff --git a/litellm/integrations/berrispend.py b/litellm/integrations/berrispend.py index 7d91ffca7..1f0ae4581 100644 --- a/litellm/integrations/berrispend.py +++ b/litellm/integrations/berrispend.py @@ -1,7 +1,7 @@ #### What this does #### # On success + failure, log events to aispend.io import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index d5000e5c4..7d1fb37d9 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -3,7 +3,6 @@ #### What this does #### # On success, logs events to Promptlayer import dotenv, os -import requests from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index b288036ad..8a3e0f467 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -1,7 +1,6 @@ #### What this does #### # On success, logs events to Promptlayer import dotenv, os -import requests from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index f5db5bf1f..d969341fc 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -2,7 +2,7 @@ # On success + failure, log events to Supabase import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/dynamodb.py b/litellm/integrations/dynamodb.py index 2ed6c3f9f..b5462ee7f 100644 --- a/litellm/integrations/dynamodb.py +++ b/litellm/integrations/dynamodb.py @@ -2,7 +2,7 @@ # On success + failure, log events to Supabase import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/greenscale.py b/litellm/integrations/greenscale.py index 3ff808ddb..78190d69d 100644 --- a/litellm/integrations/greenscale.py +++ b/litellm/integrations/greenscale.py @@ -1,15 +1,17 @@ -import requests +import requests # type: ignore import json import traceback from datetime import datetime, timezone + class GreenscaleLogger: def __init__(self): import os + self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY") self.headers = { "api-key": self.greenscale_api_key, - "Content-Type": "application/json" + "Content-Type": "application/json", } self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT") @@ -19,33 +21,48 @@ class GreenscaleLogger: data = { "modelId": kwargs.get("model"), "inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"), - "outputTokenCount": response_json.get("usage", {}).get("completion_tokens"), + "outputTokenCount": response_json.get("usage", {}).get( + "completion_tokens" + ), } - data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') - - if type(end_time) == datetime and type(start_time) == datetime: - data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000) + data["timestamp"] = datetime.now(timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + if type(end_time) == datetime and type(start_time) == datetime: + data["invocationLatency"] = int( + (end_time - start_time).total_seconds() * 1000 + ) # Add additional metadata keys to tags tags = [] metadata = kwargs.get("litellm_params", {}).get("metadata", {}) for key, value in metadata.items(): - if key.startswith("greenscale"): + if key.startswith("greenscale"): if key == "greenscale_project": data["project"] = value elif key == "greenscale_application": data["application"] = value else: - tags.append({"key": key.replace("greenscale_", ""), "value": str(value)}) - + tags.append( + {"key": key.replace("greenscale_", ""), "value": str(value)} + ) + data["tags"] = tags - response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str)) + response = requests.post( + self.greenscale_logging_url, + headers=self.headers, + data=json.dumps(data, default=str), + ) if response.status_code != 200: - print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}") + print_verbose( + f"Greenscale Logger Error - {response.text}, {response.status_code}" + ) else: print_verbose(f"Greenscale Logger Succeeded - {response.text}") except Exception as e: - print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}") - pass \ No newline at end of file + print_verbose( + f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}" + ) + pass diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index cb8663773..c8c107541 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -1,7 +1,7 @@ #### What this does #### # On success, logs events to Helicone import dotenv, os -import requests +import requests # type: ignore import litellm dotenv.load_dotenv() # Loading env variables using dotenv diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py index 415f3d2d2..8a0fb3852 100644 --- a/litellm/integrations/langsmith.py +++ b/litellm/integrations/langsmith.py @@ -1,15 +1,14 @@ #### What this does #### # On success, logs events to Langsmith -import dotenv, os -import requests -import requests +import dotenv, os # type: ignore +import requests # type: ignore from datetime import datetime dotenv.load_dotenv() # Loading env variables using dotenv import traceback import asyncio import types -from pydantic import BaseModel +from pydantic import BaseModel # type: ignore def is_serializable(value): @@ -79,8 +78,6 @@ class LangsmithLogger: except: response_obj = response_obj.dict() # type: ignore - print(f"response_obj: {response_obj}") - data = { "name": run_name, "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain" @@ -90,7 +87,6 @@ class LangsmithLogger: "start_time": start_time, "end_time": end_time, } - print(f"data: {data}") response = requests.post( "https://api.smith.langchain.com/runs", diff --git a/litellm/integrations/openmeter.py b/litellm/integrations/openmeter.py index 237a40eb8..a454739d5 100644 --- a/litellm/integrations/openmeter.py +++ b/litellm/integrations/openmeter.py @@ -2,7 +2,6 @@ ## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268 import dotenv, os, json -import requests import litellm dotenv.load_dotenv() # Loading env variables using dotenv @@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger): "total_tokens": response_obj["usage"].get("total_tokens"), } - subject = kwargs.get("user", None), # end-user passed in via 'user' param + subject = (kwargs.get("user", None),) # end-user passed in via 'user' param if not subject: raise Exception("OpenMeter: user is required") diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index e3c6e8e77..577946ce1 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -3,7 +3,7 @@ # On success, log events to Prometheus import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback @@ -19,7 +19,6 @@ class PrometheusLogger: **kwargs, ): try: - print(f"in init prometheus metrics") from prometheus_client import Counter self.litellm_llm_api_failed_requests_metric = Counter( diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index 0249a71d0..d276bb85b 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -4,7 +4,7 @@ import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback @@ -183,7 +183,6 @@ class PrometheusServicesLogger: ) async def async_service_failure_hook(self, payload: ServiceLoggerPayload): - print(f"received error payload: {payload.error}") if self.mock_testing: self.mock_testing_failure_calls += 1 diff --git a/litellm/integrations/prompt_layer.py b/litellm/integrations/prompt_layer.py index 39a80940b..ce610e1ef 100644 --- a/litellm/integrations/prompt_layer.py +++ b/litellm/integrations/prompt_layer.py @@ -1,12 +1,13 @@ #### What this does #### # On success, logs events to Promptlayer import dotenv, os -import requests +import requests # type: ignore from pydantic import BaseModel dotenv.load_dotenv() # Loading env variables using dotenv import traceback + class PromptLayerLogger: # Class variables or attributes def __init__(self): @@ -32,7 +33,11 @@ class PromptLayerLogger: tags = kwargs["litellm_params"]["metadata"]["pl_tags"] # Remove "pl_tags" from metadata - metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"} + metadata = { + k: v + for k, v in kwargs["litellm_params"]["metadata"].items() + if k != "pl_tags" + } print_verbose( f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}" diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py index dc35430bc..d31b15840 100644 --- a/litellm/integrations/s3.py +++ b/litellm/integrations/s3.py @@ -2,7 +2,6 @@ # On success + failure, log events to Supabase import dotenv, os -import requests dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py index a99e4abc4..58beba8a3 100644 --- a/litellm/integrations/supabase.py +++ b/litellm/integrations/supabase.py @@ -2,7 +2,7 @@ # On success + failure, log events to Supabase import dotenv, os -import requests +import requests # type: ignore dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py index 73d5afebe..a39a83f15 100644 --- a/litellm/llms/ai21.py +++ b/litellm/llms/ai21.py @@ -1,8 +1,8 @@ import os, types, traceback import json from enum import Enum -import requests -import time, httpx +import requests # type: ignore +import time, httpx # type: ignore from typing import Callable, Optional from litellm.utils import ModelResponse, Choices, Message import litellm diff --git a/litellm/llms/aleph_alpha.py b/litellm/llms/aleph_alpha.py index 86a30a9ec..7edd11964 100644 --- a/litellm/llms/aleph_alpha.py +++ b/litellm/llms/aleph_alpha.py @@ -1,12 +1,12 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional import litellm from litellm.utils import ModelResponse, Choices, Message, Usage -import httpx +import httpx # type: ignore class AlephAlphaError(Exception): diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 3fc374dce..818c4ecb3 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -1,7 +1,7 @@ import os, types import json from enum import Enum -import requests, copy +import requests, copy # type: ignore import time from typing import Callable, Optional, List from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper @@ -9,7 +9,7 @@ import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from .base import BaseLLM -import httpx +import httpx # type: ignore class AnthropicConstants(Enum): diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index e7af9d43b..4fed81bf6 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -1,5 +1,5 @@ from typing import Optional, Union, Any -import types, requests +import types, requests # type: ignore from .base import BaseLLM from litellm.utils import ( ModelResponse, @@ -12,7 +12,7 @@ from litellm.utils import ( from typing import Callable, Optional, BinaryIO from litellm import OpenAIConfig import litellm, json -import httpx +import httpx # type: ignore from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport from openai import AzureOpenAI, AsyncAzureOpenAI import uuid diff --git a/litellm/llms/azure_text.py b/litellm/llms/azure_text.py index e0d547477..640ab8222 100644 --- a/litellm/llms/azure_text.py +++ b/litellm/llms/azure_text.py @@ -1,5 +1,5 @@ from typing import Optional, Union, Any -import types, requests +import types, requests # type: ignore from .base import BaseLLM from litellm.utils import ( ModelResponse, diff --git a/litellm/llms/baseten.py b/litellm/llms/baseten.py index 75db9ab46..643dae530 100644 --- a/litellm/llms/baseten.py +++ b/litellm/llms/baseten.py @@ -1,7 +1,7 @@ import os import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable from litellm.utils import ModelResponse, Usage diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 2f26ae4a9..08433ba18 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config: "stop", "temperature", "top_p", - "extra_headers" + "extra_headers", ] - def map_openai_params(self, non_default_params: dict, optional_params: dict): for param, value in non_default_params.items(): if param == "max_tokens": @@ -534,10 +533,12 @@ class AmazonStabilityConfig: def add_custom_header(headers): """Closure to capture the headers and add them.""" + def callback(request, **kwargs): """Actual callback function that Boto3 will call.""" for header_name, header_value in headers.items(): request.headers.add_header(header_name, header_value) + return callback @@ -672,7 +673,9 @@ def init_bedrock_client( config=config, ) if extra_headers: - client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers)) + client.meta.events.register( + "before-sign.bedrock-runtime.*", add_custom_header(extra_headers) + ) return client @@ -1224,7 +1227,7 @@ def _embedding_func_single( "input_type", "search_document" ) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3 data = {"texts": [input], **inference_params} # type: ignore - body = json.dumps(data).encode("utf-8") + body = json.dumps(data).encode("utf-8") # type: ignore ## LOGGING request_str = f""" response = client.invoke_model( @@ -1416,7 +1419,7 @@ def image_generation( ## LOGGING request_str = f""" response = client.invoke_model( - body={body}, + body={body}, # type: ignore modelId={modelId}, accept="application/json", contentType="application/json", diff --git a/litellm/llms/cloudflare.py b/litellm/llms/cloudflare.py index b8187cbc9..5a24b3b44 100644 --- a/litellm/llms/cloudflare.py +++ b/litellm/llms/cloudflare.py @@ -1,11 +1,11 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional import litellm -import httpx +import httpx # type: ignore from litellm.utils import ModelResponse, Usage from .prompt_templates.factory import prompt_factory, custom_prompt diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py index b867559c3..0ebdf38f1 100644 --- a/litellm/llms/cohere.py +++ b/litellm/llms/cohere.py @@ -1,12 +1,12 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time, traceback from typing import Callable, Optional from litellm.utils import ModelResponse, Choices, Message, Usage import litellm -import httpx +import httpx # type: ignore class CohereError(Exception): diff --git a/litellm/llms/cohere_chat.py b/litellm/llms/cohere_chat.py index 2a9bc320b..e4de6ddcb 100644 --- a/litellm/llms/cohere_chat.py +++ b/litellm/llms/cohere_chat.py @@ -1,12 +1,12 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time, traceback from typing import Callable, Optional from litellm.utils import ModelResponse, Choices, Message, Usage import litellm -import httpx +import httpx # type: ignore from .prompt_templates.factory import cohere_message_pt diff --git a/litellm/llms/maritalk.py b/litellm/llms/maritalk.py index 4c6b86d3c..dfe53e9df 100644 --- a/litellm/llms/maritalk.py +++ b/litellm/llms/maritalk.py @@ -1,7 +1,7 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time, traceback from typing import Callable, Optional, List from litellm.utils import ModelResponse, Choices, Message, Usage diff --git a/litellm/llms/nlp_cloud.py b/litellm/llms/nlp_cloud.py index 86648118f..cd5f17a90 100644 --- a/litellm/llms/nlp_cloud.py +++ b/litellm/llms/nlp_cloud.py @@ -1,7 +1,7 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional import litellm diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 5180cfebe..9c9b5e898 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -1,10 +1,10 @@ from itertools import chain -import requests, types, time +import requests, types, time # type: ignore import json, uuid import traceback from typing import Optional import litellm -import httpx, aiohttp, asyncio +import httpx, aiohttp, asyncio # type: ignore from .prompt_templates.factory import prompt_factory, custom_prompt @@ -220,7 +220,10 @@ def get_ollama_response( tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -232,7 +235,9 @@ def get_ollama_response( model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + model prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore - completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) + completion_tokens = response_json.get( + "eval_count", len(response_json.get("message", dict()).get("content", "")) + ) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -273,7 +278,10 @@ def ollama_completion_stream(url, data, logging_obj): tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -314,9 +322,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob first_chunk_content = first_chunk.choices[0].delta.content or "" response_content = first_chunk_content + "".join( [ - chunk.choices[0].delta.content - async for chunk in streamwrapper - if chunk.choices[0].delta.content] + chunk.choices[0].delta.content + async for chunk in streamwrapper + if chunk.choices[0].delta.content + ] ) function_call = json.loads(response_content) delta = litellm.utils.Delta( @@ -324,7 +333,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -373,7 +385,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -387,7 +402,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + data["model"] prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore - completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) + completion_tokens = response_json.get( + "eval_count", + len(response_json.get("message", dict()).get("content", "")), + ) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -475,6 +493,7 @@ async def ollama_aembeddings( } return model_response + def ollama_embeddings( api_base: str, model: str, @@ -492,5 +511,6 @@ def ollama_embeddings( optional_params, logging_obj, model_response, - encoding) + encoding, ) + ) diff --git a/litellm/llms/oobabooga.py b/litellm/llms/oobabooga.py index b166c9069..f8f32e0fe 100644 --- a/litellm/llms/oobabooga.py +++ b/litellm/llms/oobabooga.py @@ -1,7 +1,7 @@ import os import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional from litellm.utils import ModelResponse, Usage diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index f007507c9..d516334ac 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -22,7 +22,6 @@ from litellm.utils import ( TextCompletionResponse, ) from typing import Callable, Optional -import aiohttp, requests import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from openai import OpenAI, AsyncOpenAI diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index 25403f598..334b80d38 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -1,7 +1,7 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional import litellm diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 65052e317..c29728134 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -1,11 +1,11 @@ import os, types import json -import requests +import requests # type: ignore import time from typing import Callable, Optional from litellm.utils import ModelResponse, Usage import litellm -import httpx +import httpx # type: ignore from .prompt_templates.factory import prompt_factory, custom_prompt diff --git a/litellm/llms/sagemaker.py b/litellm/llms/sagemaker.py index 27d3ff72a..8e75428bb 100644 --- a/litellm/llms/sagemaker.py +++ b/litellm/llms/sagemaker.py @@ -1,14 +1,14 @@ import os, types, traceback from enum import Enum import json -import requests +import requests # type: ignore import time from typing import Callable, Optional, Any import litellm from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage import sys from copy import deepcopy -import httpx +import httpx # type: ignore from .prompt_templates.factory import prompt_factory, custom_prompt @@ -295,7 +295,7 @@ def completion( EndpointName={model}, InferenceComponentName={model_id}, ContentType="application/json", - Body={data}, + Body={data}, # type: ignore CustomAttributes="accept_eula=true", ) """ # type: ignore @@ -321,7 +321,7 @@ def completion( response = client.invoke_endpoint( EndpointName={model}, ContentType="application/json", - Body={data}, + Body={data}, # type: ignore CustomAttributes="accept_eula=true", ) """ # type: ignore @@ -688,7 +688,7 @@ def embedding( response = client.invoke_endpoint( EndpointName={model}, ContentType="application/json", - Body={data}, + Body={data}, # type: ignore CustomAttributes="accept_eula=true", )""" # type: ignore logging_obj.pre_call( diff --git a/litellm/llms/together_ai.py b/litellm/llms/together_ai.py index 3f9d3b9de..47453ca88 100644 --- a/litellm/llms/together_ai.py +++ b/litellm/llms/together_ai.py @@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional import litellm -import httpx +import httpx # type: ignore from litellm.utils import ModelResponse, Usage from .prompt_templates.factory import prompt_factory, custom_prompt diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index ce0ccc73a..cab7ae19f 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -1,12 +1,12 @@ import os, types import json from enum import Enum -import requests +import requests # type: ignore import time from typing import Callable, Optional, Union, List from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason import litellm, uuid -import httpx, inspect +import httpx, inspect # type: ignore class VertexAIError(Exception): diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index e73545f99..3bdcf4fd6 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -3,7 +3,7 @@ import os, types import json from enum import Enum -import requests, copy +import requests, copy # type: ignore import time, uuid from typing import Callable, Optional, List from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper @@ -17,7 +17,7 @@ from .prompt_templates.factory import ( extract_between_tags, parse_xml_params, ) -import httpx +import httpx # type: ignore class VertexAIError(Exception): diff --git a/litellm/llms/vllm.py b/litellm/llms/vllm.py index 15f18cbdc..b2a9dd54d 100644 --- a/litellm/llms/vllm.py +++ b/litellm/llms/vllm.py @@ -1,8 +1,8 @@ import os import json from enum import Enum -import requests -import time, httpx +import requests # type: ignore +import time, httpx # type: ignore from typing import Callable, Any from litellm.utils import ModelResponse, Usage from .prompt_templates.factory import prompt_factory, custom_prompt diff --git a/litellm/llms/watsonx.py b/litellm/llms/watsonx.py index ac38a2a8f..99f2d18ba 100644 --- a/litellm/llms/watsonx.py +++ b/litellm/llms/watsonx.py @@ -3,8 +3,8 @@ import json, types, time # noqa: E401 from contextlib import contextmanager from typing import Callable, Dict, Optional, Any, Union, List -import httpx -import requests +import httpx # type: ignore +import requests # type: ignore import litellm from litellm.utils import ModelResponse, get_secret, Usage diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 41eff1eaf..0d0919e18 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -252,7 +252,7 @@ def run_server( if model and "ollama" in model and api_base is None: run_ollama_serve() if test_async is True: - import requests, concurrent, time + import requests, concurrent, time # type: ignore api_base = f"http://{host}:{port}" @@ -418,7 +418,7 @@ def run_server( read from there and save it to os.env['DATABASE_URL'] """ try: - import yaml, asyncio + import yaml, asyncio # type: ignore except: raise ImportError( "yaml needs to be imported. Run - `pip install 'litellm[proxy]'`" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ce62a7609..53352a4f8 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -30,7 +30,7 @@ sys.path.insert( try: import fastapi import backoff - import yaml + import yaml # type: ignore import orjson import logging from apscheduler.schedulers.asyncio import AsyncIOScheduler @@ -3719,6 +3719,7 @@ async def chat_completion( "x-litellm-model-id": model_id, "x-litellm-cache-key": cache_key, "x-litellm-model-api-base": api_base, + "x-litellm-version": version, } selected_data_generator = select_data_generator( response=response, @@ -3734,6 +3735,7 @@ async def chat_completion( fastapi_response.headers["x-litellm-model-id"] = model_id fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version ### CALL HOOKS ### - modify outgoing data response = await proxy_logging_obj.post_call_success_hook( @@ -3890,14 +3892,10 @@ async def completion( }, ) - if hasattr(response, "_hidden_params"): - model_id = response._hidden_params.get("model_id", None) or "" - original_response = ( - response._hidden_params.get("original_response", None) or "" - ) - else: - model_id = "" - original_response = "" + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" verbose_proxy_logger.debug("final response: %s", response) if ( @@ -3905,6 +3903,9 @@ async def completion( ): # use generate_responses to stream responses custom_headers = { "x-litellm-model-id": model_id, + "x-litellm-cache-key": cache_key, + "x-litellm-model-api-base": api_base, + "x-litellm-version": version, } selected_data_generator = select_data_generator( response=response, @@ -3919,6 +3920,10 @@ async def completion( ) fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key + fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version + return response except Exception as e: data["litellm_status"] = "fail" # used for alerting @@ -3958,6 +3963,7 @@ async def completion( ) # azure compatible endpoint async def embeddings( request: Request, + fastapi_response: Response, model: Optional[str] = None, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): @@ -4104,6 +4110,17 @@ async def embeddings( ### ALERTING ### data["litellm_status"] = "success" # used for alerting + ### RESPONSE HEADERS ### + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" + + fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key + fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version + return response except Exception as e: data["litellm_status"] = "fail" # used for alerting @@ -4142,6 +4159,7 @@ async def embeddings( ) async def image_generation( request: Request, + fastapi_response: Response, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): global proxy_logging_obj @@ -4261,6 +4279,17 @@ async def image_generation( ### ALERTING ### data["litellm_status"] = "success" # used for alerting + ### RESPONSE HEADERS ### + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" + + fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key + fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version + return response except Exception as e: data["litellm_status"] = "fail" # used for alerting @@ -4297,6 +4326,7 @@ async def image_generation( ) async def audio_transcriptions( request: Request, + fastapi_response: Response, file: UploadFile = File(...), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): @@ -4441,6 +4471,18 @@ async def audio_transcriptions( ### ALERTING ### data["litellm_status"] = "success" # used for alerting + + ### RESPONSE HEADERS ### + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" + + fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key + fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version + return response except Exception as e: data["litellm_status"] = "fail" # used for alerting @@ -4480,6 +4522,7 @@ async def audio_transcriptions( ) async def moderations( request: Request, + fastapi_response: Response, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): """ @@ -4604,6 +4647,17 @@ async def moderations( ### ALERTING ### data["litellm_status"] = "success" # used for alerting + ### RESPONSE HEADERS ### + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" + + fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key + fastapi_response.headers["x-litellm-model-api-base"] = api_base + fastapi_response.headers["x-litellm-version"] = version + return response except Exception as e: data["litellm_status"] = "fail" # used for alerting diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 0379d5152..e4fa73307 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any: module_file_path = os.path.join(directory, *module_name.split(".")) module_file_path += ".py" - spec = importlib.util.spec_from_file_location(module_name, module_file_path) + spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore if spec is None: raise ImportError( f"Could not find a module specification for {module_file_path}" ) - module = importlib.util.module_from_spec(spec) + module = importlib.util.module_from_spec(spec) # type: ignore spec.loader.exec_module(module) # type: ignore else: # Dynamically import the module diff --git a/litellm/router_strategy/least_busy.py b/litellm/router_strategy/least_busy.py index 68874cec4..54d44b41d 100644 --- a/litellm/router_strategy/least_busy.py +++ b/litellm/router_strategy/least_busy.py @@ -6,7 +6,7 @@ # - use litellm.success + failure callbacks to log when a request completed # - in get_available_deployment, for a given model group name -> pick based on traffic -import dotenv, os, requests, random +import dotenv, os, requests, random # type: ignore from typing import Optional dotenv.load_dotenv() # Loading env variables using dotenv diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py index 2d010fb4f..279af2ae9 100644 --- a/litellm/router_strategy/lowest_cost.py +++ b/litellm/router_strategy/lowest_cost.py @@ -1,7 +1,7 @@ #### What this does #### # picks based on response time (for streaming, this is time to first token) from pydantic import BaseModel, Extra, Field, root_validator -import dotenv, os, requests, random +import dotenv, os, requests, random # type: ignore from typing import Optional, Union, List, Dict from datetime import datetime, timedelta import random diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index 5f0f15aac..afdfc1779 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -1,7 +1,7 @@ #### What this does #### # picks based on response time (for streaming, this is time to first token) -from pydantic import BaseModel, Extra, Field, root_validator -import dotenv, os, requests, random +from pydantic import BaseModel, Extra, Field, root_validator # type: ignore +import dotenv, os, requests, random # type: ignore from typing import Optional, Union, List, Dict from datetime import datetime, timedelta import random diff --git a/litellm/utils.py b/litellm/utils.py index 88e395233..8136b8777 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -14,7 +14,7 @@ import subprocess, os from os.path import abspath, join, dirname import litellm, openai import itertools -import random, uuid, requests +import random, uuid, requests # type: ignore from functools import wraps import datetime, time import tiktoken @@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata try: # this works in python 3.8 - import pkg_resources + import pkg_resources # type: ignore filename = pkg_resources.resource_filename(__name__, "llms/tokenizers") # try: @@ -7732,11 +7732,11 @@ def _calculate_retry_after( try: retry_after = int(retry_header) except Exception: - retry_date_tuple = email.utils.parsedate_tz(retry_header) + retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore if retry_date_tuple is None: retry_after = -1 else: - retry_date = email.utils.mktime_tz(retry_date_tuple) + retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore retry_after = int(retry_date - time.time()) else: retry_after = -1 @@ -9423,7 +9423,9 @@ def get_secret( else: secret = os.environ.get(secret_name) try: - secret_value_as_bool = ast.literal_eval(secret) if secret is not None else None + secret_value_as_bool = ( + ast.literal_eval(secret) if secret is not None else None + ) if isinstance(secret_value_as_bool, bool): return secret_value_as_bool else: