From 757baf3fd89e518398803b13eb552ffc1153c1ea Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 11 Jun 2024 18:45:07 -0700 Subject: [PATCH 001/655] fix allow setting UI _BASE path --- ui/litellm-dashboard/next.config.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/litellm-dashboard/next.config.mjs b/ui/litellm-dashboard/next.config.mjs index e1f8aa083..6e2924677 100644 --- a/ui/litellm-dashboard/next.config.mjs +++ b/ui/litellm-dashboard/next.config.mjs @@ -1,7 +1,7 @@ /** @type {import('next').NextConfig} */ const nextConfig = { output: 'export', - basePath: '/ui', + basePath: process.env.UI_BASE_PATH || '/ui', }; nextConfig.experimental = { From 870323b592efdb87b088c3fe1552423853354ca0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 11 Jun 2024 18:59:42 -0700 Subject: [PATCH 002/655] fix edit docker file ui base path --- build_admin_ui.sh | 110 +++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/build_admin_ui.sh b/build_admin_ui.sh index 5373ad0e3..897449023 100755 --- a/build_admin_ui.sh +++ b/build_admin_ui.sh @@ -7,56 +7,68 @@ echo pwd +# if UI_BASE_PATH env is set +if [ -z "$UI_BASE_PATH" ]; then -# only run this step for litellm enterprise, we run this if enterprise/enterprise_ui/_enterprise.json exists -if [ ! -f "enterprise/enterprise_ui/enterprise_colors.json" ]; then + +# only run this step for litellm enterprise, we run this if enterprise/enterprise_ui/_enterprise.json exists or env var UI_BASE_PATH is set +if [ -f "enterprise/enterprise_ui/enterprise_colors.json" ] || [ -n "${UI_BASE_PATH:-}" ]; then + echo "Building Admin UI..." + + # Install dependencies + # Check if we are on macOS + if [[ "$(uname)" == "Darwin" ]]; then + # Install dependencies using Homebrew + if ! command -v brew &> /dev/null; then + echo "Error: Homebrew not found. Please install Homebrew and try again." + exit 1 + fi + brew update + brew install curl + else + # Assume Linux, try using apt-get + if command -v apt-get &> /dev/null; then + apt-get update + apt-get install -y curl + elif command -v apk &> /dev/null; then + # Try using apk if apt-get is not available + apk update + apk add curl + else + echo "Error: Unsupported package manager. Cannot install dependencies." + exit 1 + fi + fi + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash + source ~/.nvm/nvm.sh + nvm install v18.17.0 + nvm use v18.17.0 + npm install -g npm + + if [ -n "${UI_BASE_PATH:-}" ]; then + echo "Using UI_BASE_PATH: $UI_BASE_PATH" + + # make a file call .env in ui/litellm-dashboard and store the UI_BASE_PATH in it + echo "UI_BASE_PATH=$UI_BASE_PATH" > ui/litellm-dashboard/.env + + fi + + # copy _enterprise.json from this directory to /ui/litellm-dashboard, and rename it to ui_colors.json + cp enterprise/enterprise_ui/enterprise_colors.json ui/litellm-dashboard/ui_colors.json + + # cd in to /ui/litellm-dashboard + cd ui/litellm-dashboard + + # ensure have access to build_ui.sh + chmod +x ./build_ui.sh + + # run ./build_ui.sh + ./build_ui.sh + + # return to root directory + cd ../.. + +else echo "Admin UI - using default LiteLLM UI" exit 0 fi - -echo "Building Custom Admin UI..." - -# Install dependencies -# Check if we are on macOS -if [[ "$(uname)" == "Darwin" ]]; then - # Install dependencies using Homebrew - if ! command -v brew &> /dev/null; then - echo "Error: Homebrew not found. Please install Homebrew and try again." - exit 1 - fi - brew update - brew install curl -else - # Assume Linux, try using apt-get - if command -v apt-get &> /dev/null; then - apt-get update - apt-get install -y curl - elif command -v apk &> /dev/null; then - # Try using apk if apt-get is not available - apk update - apk add curl - else - echo "Error: Unsupported package manager. Cannot install dependencies." - exit 1 - fi -fi -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash -source ~/.nvm/nvm.sh -nvm install v18.17.0 -nvm use v18.17.0 -npm install -g npm - -# copy _enterprise.json from this directory to /ui/litellm-dashboard, and rename it to ui_colors.json -cp enterprise/enterprise_ui/enterprise_colors.json ui/litellm-dashboard/ui_colors.json - -# cd in to /ui/litellm-dashboard -cd ui/litellm-dashboard - -# ensure have access to build_ui.sh -chmod +x ./build_ui.sh - -# run ./build_ui.sh -./build_ui.sh - -# return to root directory -cd ../.. \ No newline at end of file From 42f9b1255afd0f1065863bc9c5156e5e464607c3 Mon Sep 17 00:00:00 2001 From: Daniel Bichuetti Date: Tue, 16 Jul 2024 09:19:19 -0300 Subject: [PATCH 003/655] Update model_prices_and_context_window.json --- model_prices_and_context_window.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 60f812b2b..c9a92972c 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -4086,7 +4086,7 @@ "litellm_provider": "perplexity", "mode": "chat" }, - "fireworks_ai/firefunction-v2": { + "fireworks_ai/accounts/fireworks/models/firefunction-v2": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -4097,7 +4097,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/mixtral-8x22b-instruct-hf": { + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, @@ -4108,7 +4108,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/qwen2-72b-instruct": { + "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, @@ -4119,7 +4119,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/yi-large": { + "fireworks_ai/accounts/fireworks/models/yi-large": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, @@ -4130,7 +4130,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/deepseek-coder-v2-instruct": { + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 8192, From 8b830358e3d9b941e9143ebc15944668859aacdb Mon Sep 17 00:00:00 2001 From: Daniel Bichuetti Date: Wed, 17 Jul 2024 08:19:36 -0300 Subject: [PATCH 004/655] Update utils.py Allow using custom models and on-demand deployments --- litellm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index 48fdf80c5..76292b3a4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4417,7 +4417,7 @@ def get_llm_provider( dynamic_api_key = get_secret("DEEPSEEK_API_KEY") elif custom_llm_provider == "fireworks_ai": # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.fireworks.ai/inference/v1 - if not model.startswith("accounts/fireworks/models"): + if not model.startswith("accounts/"): model = f"accounts/fireworks/models/{model}" api_base = api_base or "https://api.fireworks.ai/inference/v1" dynamic_api_key = ( From 98bed2a24837756100c4518220f912d34680d3c1 Mon Sep 17 00:00:00 2001 From: maamalama Date: Wed, 24 Jul 2024 17:40:20 -0700 Subject: [PATCH 005/655] tools_call to Helicone --- litellm/integrations/helicone.py | 59 ++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index f0666431a..edab68c1c 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -17,27 +17,50 @@ class HeliconeLogger: self.key = os.getenv("HELICONE_API_KEY") def claude_mapping(self, model, messages, response_obj): - from anthropic import HUMAN_PROMPT, AI_PROMPT + from anthropic import HUMAN_PROMPT, AI_PROMPT - prompt = f"{HUMAN_PROMPT}" - for message in messages: - if "role" in message: - if message["role"] == "user": - prompt += f"{HUMAN_PROMPT}{message['content']}" - else: - prompt += f"{AI_PROMPT}{message['content']}" - else: - prompt += f"{HUMAN_PROMPT}{message['content']}" - prompt += f"{AI_PROMPT}" - claude_provider_request = {"model": model, "prompt": prompt} + prompt = f"{HUMAN_PROMPT}" + for message in messages: + if "role" in message: + if message["role"] == "user": + prompt += f"{HUMAN_PROMPT}{message['content']}" + else: + prompt += f"{AI_PROMPT}{message['content']}" + else: + prompt += f"{HUMAN_PROMPT}{message['content']}" + prompt += f"{AI_PROMPT}" + claude_provider_request = {"model": model, "prompt": prompt} - claude_response_obj = { - "completion": response_obj["choices"][0]["message"]["content"], - "model": model, - "stop_reason": "stop_sequence", - } + choice = response_obj["choices"][0] + message = choice["message"] - return claude_provider_request, claude_response_obj + content = [] + if "tool_calls" in message and message["tool_calls"]: + for tool_call in message["tool_calls"]: + content.append({ + "type": "tool_use", + "id": tool_call["id"], + "name": tool_call["function"]["name"], + "input": tool_call["function"]["arguments"] + }) + elif "content" in message and message["content"]: + content = [{"type": "text", "text": message["content"]}] + + claude_response_obj = { + "id": response_obj["id"], + "type": "message", + "role": "assistant", + "model": model, + "content": content, + "stop_reason": choice["finish_reason"], + "stop_sequence": None, + "usage": { + "input_tokens": response_obj["usage"]["prompt_tokens"], + "output_tokens": response_obj["usage"]["completion_tokens"] + } + } + + return claude_provider_request, claude_response_obj @staticmethod def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: From 41811218b2e22b6fa5c07a655ec7e04a0e9e3a67 Mon Sep 17 00:00:00 2001 From: maamalama Date: Wed, 24 Jul 2024 17:42:07 -0700 Subject: [PATCH 006/655] tab --- litellm/integrations/helicone.py | 78 ++++++++++++++++---------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index edab68c1c..b00e14517 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -17,50 +17,50 @@ class HeliconeLogger: self.key = os.getenv("HELICONE_API_KEY") def claude_mapping(self, model, messages, response_obj): - from anthropic import HUMAN_PROMPT, AI_PROMPT + from anthropic import HUMAN_PROMPT, AI_PROMPT - prompt = f"{HUMAN_PROMPT}" - for message in messages: - if "role" in message: - if message["role"] == "user": - prompt += f"{HUMAN_PROMPT}{message['content']}" - else: - prompt += f"{AI_PROMPT}{message['content']}" - else: - prompt += f"{HUMAN_PROMPT}{message['content']}" - prompt += f"{AI_PROMPT}" - claude_provider_request = {"model": model, "prompt": prompt} + prompt = f"{HUMAN_PROMPT}" + for message in messages: + if "role" in message: + if message["role"] == "user": + prompt += f"{HUMAN_PROMPT}{message['content']}" + else: + prompt += f"{AI_PROMPT}{message['content']}" + else: + prompt += f"{HUMAN_PROMPT}{message['content']}" + prompt += f"{AI_PROMPT}" + claude_provider_request = {"model": model, "prompt": prompt} - choice = response_obj["choices"][0] - message = choice["message"] + choice = response_obj["choices"][0] + message = choice["message"] - content = [] - if "tool_calls" in message and message["tool_calls"]: - for tool_call in message["tool_calls"]: - content.append({ - "type": "tool_use", - "id": tool_call["id"], - "name": tool_call["function"]["name"], - "input": tool_call["function"]["arguments"] - }) - elif "content" in message and message["content"]: - content = [{"type": "text", "text": message["content"]}] + content = [] + if "tool_calls" in message and message["tool_calls"]: + for tool_call in message["tool_calls"]: + content.append({ + "type": "tool_use", + "id": tool_call["id"], + "name": tool_call["function"]["name"], + "input": tool_call["function"]["arguments"] + }) + elif "content" in message and message["content"]: + content = [{"type": "text", "text": message["content"]}] - claude_response_obj = { - "id": response_obj["id"], - "type": "message", - "role": "assistant", - "model": model, - "content": content, - "stop_reason": choice["finish_reason"], - "stop_sequence": None, - "usage": { - "input_tokens": response_obj["usage"]["prompt_tokens"], - "output_tokens": response_obj["usage"]["completion_tokens"] - } - } + claude_response_obj = { + "id": response_obj["id"], + "type": "message", + "role": "assistant", + "model": model, + "content": content, + "stop_reason": choice["finish_reason"], + "stop_sequence": None, + "usage": { + "input_tokens": response_obj["usage"]["prompt_tokens"], + "output_tokens": response_obj["usage"]["completion_tokens"] + } + } - return claude_provider_request, claude_response_obj + return claude_provider_request, claude_response_obj @staticmethod def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: From 6ab2527fdcdf9cf0c94d129bc0bc2853a6f1f0d3 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 24 Jul 2024 18:14:49 -0700 Subject: [PATCH 007/655] feat(auth_check.py): support using redis cache for team objects Allows team update / check logic to work across instances instantly --- litellm/proxy/_new_secret_config.yaml | 5 +- litellm/proxy/auth/auth_checks.py | 24 ++++++- .../management_endpoints/team_endpoints.py | 2 + litellm/proxy/utils.py | 2 +- litellm/tests/test_proxy_server.py | 64 +++++++++++++++++++ 5 files changed, 92 insertions(+), 5 deletions(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bec92c1e9..13babaac6 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -4,5 +4,6 @@ model_list: model: "openai/*" # passes our validation check that a real provider is given api_key: "" -general_settings: - completion_model: "gpt-3.5-turbo" \ No newline at end of file +litellm_settings: + cache: True + \ No newline at end of file diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index 91d4b1938..7c5356a37 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -370,10 +370,17 @@ async def _cache_team_object( team_id: str, team_table: LiteLLM_TeamTable, user_api_key_cache: DualCache, + proxy_logging_obj: Optional[ProxyLogging], ): key = "team_id:{}".format(team_id) await user_api_key_cache.async_set_cache(key=key, value=team_table) + ## UPDATE REDIS CACHE ## + if proxy_logging_obj is not None: + await proxy_logging_obj.internal_usage_cache.async_set_cache( + key=key, value=team_table + ) + @log_to_opentelemetry async def get_team_object( @@ -395,7 +402,17 @@ async def get_team_object( # check if in cache key = "team_id:{}".format(team_id) - cached_team_obj = await user_api_key_cache.async_get_cache(key=key) + + cached_team_obj: Optional[LiteLLM_TeamTable] = None + ## CHECK REDIS CACHE ## + if proxy_logging_obj is not None: + cached_team_obj = await proxy_logging_obj.internal_usage_cache.async_get_cache( + key=key + ) + + if cached_team_obj is None: + cached_team_obj = await user_api_key_cache.async_get_cache(key=key) + if cached_team_obj is not None: if isinstance(cached_team_obj, dict): return LiteLLM_TeamTable(**cached_team_obj) @@ -413,7 +430,10 @@ async def get_team_object( _response = LiteLLM_TeamTable(**response.dict()) # save the team object to cache await _cache_team_object( - team_id=team_id, team_table=_response, user_api_key_cache=user_api_key_cache + team_id=team_id, + team_table=_response, + user_api_key_cache=user_api_key_cache, + proxy_logging_obj=proxy_logging_obj, ) return _response diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py index 9ba76a203..9c20836d2 100644 --- a/litellm/proxy/management_endpoints/team_endpoints.py +++ b/litellm/proxy/management_endpoints/team_endpoints.py @@ -334,6 +334,7 @@ async def update_team( create_audit_log_for_update, litellm_proxy_admin_name, prisma_client, + proxy_logging_obj, user_api_key_cache, ) @@ -380,6 +381,7 @@ async def update_team( team_id=team_row.team_id, team_table=team_row, user_api_key_cache=user_api_key_cache, + proxy_logging_obj=proxy_logging_obj, ) # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index b08d7a30f..fc47abf9c 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -862,7 +862,7 @@ class PrismaClient: ) """ ) - if ret[0]['sum'] == 6: + if ret[0]["sum"] == 6: print("All necessary views exist!") # noqa return except Exception: diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index f3cb69a08..e088f2055 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -731,3 +731,67 @@ def test_load_router_config(mock_cache, fake_env_vars): # test_load_router_config() + + +@pytest.mark.asyncio +async def test_team_update_redis(): + """ + Tests if team update, updates the redis cache if set + """ + from litellm.caching import DualCache, RedisCache + from litellm.proxy._types import LiteLLM_TeamTable + from litellm.proxy.auth.auth_checks import _cache_team_object + + proxy_logging_obj: ProxyLogging = getattr( + litellm.proxy.proxy_server, "proxy_logging_obj" + ) + + proxy_logging_obj.internal_usage_cache.redis_cache = RedisCache() + + with patch.object( + proxy_logging_obj.internal_usage_cache.redis_cache, + "async_set_cache", + new=MagicMock(), + ) as mock_client: + await _cache_team_object( + team_id="1234", + team_table=LiteLLM_TeamTable(), + user_api_key_cache=DualCache(), + proxy_logging_obj=proxy_logging_obj, + ) + + mock_client.assert_called_once() + + +@pytest.mark.asyncio +async def test_get_team_redis(client_no_auth): + """ + Tests if get_team_object gets value from redis cache, if set + """ + from litellm.caching import DualCache, RedisCache + from litellm.proxy._types import LiteLLM_TeamTable + from litellm.proxy.auth.auth_checks import _cache_team_object, get_team_object + + proxy_logging_obj: ProxyLogging = getattr( + litellm.proxy.proxy_server, "proxy_logging_obj" + ) + + proxy_logging_obj.internal_usage_cache.redis_cache = RedisCache() + + with patch.object( + proxy_logging_obj.internal_usage_cache.redis_cache, + "async_get_cache", + new=AsyncMock(), + ) as mock_client: + try: + await get_team_object( + team_id="1234", + user_api_key_cache=DualCache(), + parent_otel_span=None, + proxy_logging_obj=proxy_logging_obj, + prisma_client=MagicMock(), + ) + except Exception as e: + pass + + mock_client.assert_called_once() From 3cd3491920fa64c9e7c9635478f05615d132cafb Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 24 Jul 2024 19:47:50 -0700 Subject: [PATCH 008/655] test: cleanup testing --- litellm/tests/test_completion.py | 37 ++++++++++++++++++++++++-------- litellm/tests/test_embedding.py | 19 +++++----------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 9061293d5..6aaf99515 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2611,18 +2611,37 @@ def test_completion_azure_ad_token(): # If you want to remove it, speak to Ishaan! # Ishaan will be very disappointed if this test is removed -> this is a standard way to pass api_key + the router + proxy use this from httpx import Client - from openai import AzureOpenAI from litellm import completion - from litellm.llms.custom_httpx.httpx_handler import HTTPHandler - response = completion( - model="azure/chatgpt-v-2", - messages=messages, - # api_key="my-fake-ad-token", - azure_ad_token=os.getenv("AZURE_API_KEY"), - ) - print(response) + litellm.set_verbose = True + + old_key = os.environ["AZURE_API_KEY"] + os.environ.pop("AZURE_API_KEY", None) + + http_client = Client() + + with patch.object(http_client, "send", new=MagicMock()) as mock_client: + litellm.client_session = http_client + try: + response = completion( + model="azure/chatgpt-v-2", + messages=messages, + azure_ad_token="my-special-token", + ) + print(response) + except Exception as e: + pass + finally: + os.environ["AZURE_API_KEY"] = old_key + + mock_client.assert_called_once() + request = mock_client.call_args[0][0] + print(request.method) # This will print 'POST' + print(request.url) # This will print the full URL + print(request.headers) # This will print the full URL + auth_header = request.headers.get("Authorization") + assert auth_header == "Bearer my-special-token" def test_completion_azure_key_completion_arg(): diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index e6dd8bbb2..79ba8bc3e 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -206,6 +206,9 @@ def test_openai_azure_embedding_with_oidc_and_cf(): os.environ["AZURE_TENANT_ID"] = "17c0a27a-1246-4aa1-a3b6-d294e80e783c" os.environ["AZURE_CLIENT_ID"] = "4faf5422-b2bd-45e8-a6d7-46543a38acd0" + old_key = os.environ["AZURE_API_KEY"] + os.environ.pop("AZURE_API_KEY", None) + try: response = embedding( model="azure/text-embedding-ada-002", @@ -218,6 +221,8 @@ def test_openai_azure_embedding_with_oidc_and_cf(): except Exception as e: pytest.fail(f"Error occurred: {e}") + finally: + os.environ["AZURE_API_KEY"] = old_key def test_openai_azure_embedding_optional_arg(mocker): @@ -673,17 +678,3 @@ async def test_databricks_embeddings(sync_mode): # print(response) # local_proxy_embeddings() - - -def test_embedding_azure_ad_token(): - # this tests if we can pass api_key to completion, when it's not in the env. - # DO NOT REMOVE THIS TEST. No MATTER WHAT Happens! - # If you want to remove it, speak to Ishaan! - # Ishaan will be very disappointed if this test is removed -> this is a standard way to pass api_key + the router + proxy use this - - response = embedding( - model="azure/azure-embedding-model", - input=["good morning from litellm"], - azure_ad_token=os.getenv("AZURE_API_KEY"), - ) - print(response) From 4cd96976b38ec94164d1064f537efa22bd0f1553 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Jul 2024 20:46:56 -0700 Subject: [PATCH 009/655] feat - add groq/llama-3.1 --- ...odel_prices_and_context_window_backup.json | 30 +++++++++++++++++++ model_prices_and_context_window.json | 30 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 08bc292c9..428d95589 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1094,6 +1094,36 @@ "mode": "chat", "supports_function_calling": true }, + "groq/llama-3.1-8b-instant": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, + "groq/llama-3.1-70b-versatile": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, + "groq/llama-3.1-405b-reasoning": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, "groq/mixtral-8x7b-32768": { "max_tokens": 32768, "max_input_tokens": 32768, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 08bc292c9..428d95589 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1094,6 +1094,36 @@ "mode": "chat", "supports_function_calling": true }, + "groq/llama-3.1-8b-instant": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, + "groq/llama-3.1-70b-versatile": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, + "groq/llama-3.1-405b-reasoning": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true + }, "groq/mixtral-8x7b-32768": { "max_tokens": 32768, "max_input_tokens": 32768, From c08d4ca9ec2e0f6cb8ffb725a23d4a899e5cd181 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Jul 2024 20:49:28 -0700 Subject: [PATCH 010/655] docs groq models --- docs/my-website/docs/providers/groq.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/groq.md b/docs/my-website/docs/providers/groq.md index bfb944cb4..37d63d031 100644 --- a/docs/my-website/docs/providers/groq.md +++ b/docs/my-website/docs/providers/groq.md @@ -148,8 +148,11 @@ print(response) ## Supported Models - ALL Groq Models Supported! We support ALL Groq models, just set `groq/` as a prefix when sending completion requests -| Model Name | Function Call | +| Model Name | Usage | |--------------------|---------------------------------------------------------| +| llama-3.1-8b-instant | `completion(model="groq/llama-3.1-8b-instant", messages)` | +| llama-3.1-70b-versatile | `completion(model="groq/llama-3.1-70b-versatile", messages)` | +| llama-3.1-405b-reasoning | `completion(model="groq/llama-3.1-405b-reasoning", messages)` | | llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | | llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | | llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | From d5a7c654f191a5d320bdbe875941d15bbe18d28b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Jul 2024 21:25:31 -0700 Subject: [PATCH 011/655] =?UTF-8?q?bump:=20version=201.42.0=20=E2=86=92=20?= =?UTF-8?q?1.42.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 10246abd7..08a41c9ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.42.0" +version = "1.42.1" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.42.0" +version = "1.42.1" version_files = [ "pyproject.toml:^version" ] From c77abaa07f274774e1497dced7d59e2df33cddb0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Jul 2024 21:31:41 -0700 Subject: [PATCH 012/655] feat - add mistral large 2 --- ...odel_prices_and_context_window_backup.json | 20 ++++++++++++++----- model_prices_and_context_window.json | 20 ++++++++++++++----- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 428d95589..667745c30 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -893,11 +893,11 @@ "mode": "chat" }, "mistral/mistral-large-latest": { - "max_tokens": 8191, - "max_input_tokens": 32000, - "max_output_tokens": 8191, - "input_cost_per_token": 0.000004, - "output_cost_per_token": 0.000012, + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true @@ -912,6 +912,16 @@ "mode": "chat", "supports_function_calling": true }, + "mistral/mistral-large-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true + }, "mistral/open-mistral-7b": { "max_tokens": 8191, "max_input_tokens": 32000, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 428d95589..667745c30 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -893,11 +893,11 @@ "mode": "chat" }, "mistral/mistral-large-latest": { - "max_tokens": 8191, - "max_input_tokens": 32000, - "max_output_tokens": 8191, - "input_cost_per_token": 0.000004, - "output_cost_per_token": 0.000012, + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true @@ -912,6 +912,16 @@ "mode": "chat", "supports_function_calling": true }, + "mistral/mistral-large-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true + }, "mistral/open-mistral-7b": { "max_tokens": 8191, "max_input_tokens": 32000, From a92a2ca382403f47dbb5f588c9f855b5d7b3d9a5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Jul 2024 21:35:34 -0700 Subject: [PATCH 013/655] docs add mistral api large 2 --- docs/my-website/docs/providers/mistral.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/mistral.md b/docs/my-website/docs/providers/mistral.md index 21e3a9d54..62a91c687 100644 --- a/docs/my-website/docs/providers/mistral.md +++ b/docs/my-website/docs/providers/mistral.md @@ -148,7 +148,8 @@ All models listed here https://docs.mistral.ai/platform/endpoints are supported. |----------------|--------------------------------------------------------------| | Mistral Small | `completion(model="mistral/mistral-small-latest", messages)` | | Mistral Medium | `completion(model="mistral/mistral-medium-latest", messages)`| -| Mistral Large | `completion(model="mistral/mistral-large-latest", messages)` | +| Mistral Large 2 | `completion(model="mistral/mistral-large-2407", messages)` | +| Mistral Large Latest | `completion(model="mistral/mistral-large-latest", messages)` | | Mistral 7B | `completion(model="mistral/open-mistral-7b", messages)` | | Mixtral 8x7B | `completion(model="mistral/open-mixtral-8x7b", messages)` | | Mixtral 8x22B | `completion(model="mistral/open-mixtral-8x22b", messages)` | From b376ee71b01e3e8c6453a3dd21421b365aaaf9f8 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 24 Jul 2024 21:51:24 -0700 Subject: [PATCH 014/655] fix(internal_user_endpoints.py): support updating budgets for `/user/update` --- .../proxy/management_endpoints/internal_user_endpoints.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index 280ff2ad2..b132761ae 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -27,6 +27,7 @@ from litellm._logging import verbose_proxy_logger from litellm.proxy._types import * from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.proxy.management_endpoints.key_management_endpoints import ( + _duration_in_seconds, generate_key_helper_fn, ) from litellm.proxy.management_helpers.utils import ( @@ -486,6 +487,13 @@ async def user_update( ): # models default to [], spend defaults to 0, we should not reset these values non_default_values[k] = v + if "budget_duration" in non_default_values: + duration_s = _duration_in_seconds( + duration=non_default_values["budget_duration"] + ) + user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s) + non_default_values["budget_reset_at"] = user_reset_at + ## ADD USER, IF NEW ## verbose_proxy_logger.debug("/user/update: Received data = %s", data) if data.user_id is not None and len(data.user_id) > 0: From 4e51f712f3c4982833255782a4c70961f3b8b56a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 09:57:19 -0700 Subject: [PATCH 015/655] fix(main.py): fix calling openai gpt-3.5-turbo-instruct via /completions Fixes https://github.com/BerriAI/litellm/issues/749 --- litellm/main.py | 10 ++++++---- litellm/proxy/_new_secret_config.yaml | 8 ++------ litellm/tests/test_get_llm_provider.py | 14 ++++++++++++-- litellm/tests/test_text_completion.py | 21 ++++++++++++++++++++- litellm/utils.py | 2 +- 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 35fad5e02..f724a68bd 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3833,7 +3833,7 @@ def text_completion( optional_params["custom_llm_provider"] = custom_llm_provider # get custom_llm_provider - _, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore + _model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore if custom_llm_provider == "huggingface": # if echo == True, for TGI llms we need to set top_n_tokens to 3 @@ -3916,10 +3916,12 @@ def text_completion( kwargs.pop("prompt", None) - if model is not None and model.startswith( - "openai/" + if ( + _model is not None and custom_llm_provider == "openai" ): # for openai compatible endpoints - e.g. vllm, call the native /v1/completions endpoint for text completion calls - model = model.replace("openai/", "text-completion-openai/") + if _model not in litellm.open_ai_chat_completion_models: + model = "text-completion-openai/" + _model + optional_params.pop("custom_llm_provider", None) kwargs["text_completion"] = True response = completion( diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bec92c1e9..c4d2a6441 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,8 +1,4 @@ model_list: - - model_name: "*" # all requests where model not in your config go to this deployment + - model_name: "test-model" litellm_params: - model: "openai/*" # passes our validation check that a real provider is given - api_key: "" - -general_settings: - completion_model: "gpt-3.5-turbo" \ No newline at end of file + model: "openai/gpt-3.5-turbo-instruct-0914" \ No newline at end of file diff --git a/litellm/tests/test_get_llm_provider.py b/litellm/tests/test_get_llm_provider.py index e443830b2..3ec867af4 100644 --- a/litellm/tests/test_get_llm_provider.py +++ b/litellm/tests/test_get_llm_provider.py @@ -1,14 +1,18 @@ -import sys, os +import os +import sys import traceback + from dotenv import load_dotenv load_dotenv() -import os, io +import io +import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import pytest + import litellm @@ -21,6 +25,12 @@ def test_get_llm_provider(): # test_get_llm_provider() +def test_get_llm_provider_gpt_instruct(): + _, response, _, _ = litellm.get_llm_provider(model="gpt-3.5-turbo-instruct-0914") + + assert response == "text-completion-openai" + + def test_get_llm_provider_mistral_custom_api_base(): model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider( model="mistral/mistral-large-fr", diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index c6bbf71f2..6a0080b37 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -3840,7 +3840,26 @@ def test_completion_chatgpt_prompt(): try: print("\n gpt3.5 test\n") response = text_completion( - model="gpt-3.5-turbo", prompt="What's the weather in SF?" + model="openai/gpt-3.5-turbo", prompt="What's the weather in SF?" + ) + print(response) + response_str = response["choices"][0]["text"] + print("\n", response.choices) + print("\n", response.choices[0]) + # print(response.choices[0].text) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_chatgpt_prompt() + + +def test_completion_gpt_instruct(): + try: + response = text_completion( + model="gpt-3.5-turbo-instruct-0914", + prompt="What's the weather in SF?", + custom_llm_provider="openai", ) print(response) response_str = response["choices"][0]["text"] diff --git a/litellm/utils.py b/litellm/utils.py index f35f1ce4b..e104de958 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2774,7 +2774,7 @@ def get_optional_params( tool_function["parameters"] = new_parameters def _check_valid_arg(supported_params): - verbose_logger.debug( + verbose_logger.info( f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}" ) verbose_logger.debug( From 80800b9ec86478003dffe58fd433259fcdd0f021 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 10:01:47 -0700 Subject: [PATCH 016/655] docs(caching.md): update caching docs to include ttl info --- docs/my-website/docs/proxy/caching.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/my-website/docs/proxy/caching.md b/docs/my-website/docs/proxy/caching.md index 6769ec6c5..ded8333f0 100644 --- a/docs/my-website/docs/proxy/caching.md +++ b/docs/my-website/docs/proxy/caching.md @@ -59,6 +59,8 @@ litellm_settings: cache_params: # set cache params for redis type: redis ttl: 600 # will be cached on redis for 600s + # default_in_memory_ttl: Optional[float], default is None. time in seconds. + # default_in_redis_ttl: Optional[float], default is None. time in seconds. ``` @@ -613,6 +615,11 @@ litellm_settings: ```yaml cache_params: + # ttl + ttl: Optional[float] + default_in_memory_ttl: Optional[float] + default_in_redis_ttl: Optional[float] + # Type of cache (options: "local", "redis", "s3") type: s3 @@ -628,6 +635,8 @@ cache_params: host: localhost # Redis server hostname or IP address port: "6379" # Redis server port (as a string) password: secret_password # Redis server password + namespace: Optional[str] = None, + # S3 cache parameters s3_bucket_name: your_s3_bucket_name # Name of the S3 bucket From 5553f84d511fc352dc95cbf49ad752eefbfeefa5 Mon Sep 17 00:00:00 2001 From: fracapuano Date: Thu, 25 Jul 2024 19:06:07 +0200 Subject: [PATCH 017/655] fix: now supports single tokens prediction --- litellm/llms/replicate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 1dd29fd7d..0d129ce02 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -387,7 +387,7 @@ def process_response( result = " " ## Building RESPONSE OBJECT - if len(result) > 1: + if len(result) >= 1: model_response.choices[0].message.content = result # type: ignore # Calculate usage From d91b01a24bf72eec6e38a4325bddc1a2e78a1faa Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 10:08:40 -0700 Subject: [PATCH 018/655] docs(enterprise.md): cleanup docs --- docs/my-website/docs/proxy/enterprise.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index 5b97dc14e..01bc32783 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -25,7 +25,7 @@ Features: - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - **Spend Tracking** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) - - ✅ [API Endpoints to get Spend Reports per Team, API Key, Customer](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) + - ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) - **Advanced Metrics** - ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) - **Guardrails, PII Masking, Content Moderation** From 397451570e3a97ef12564bb4745828606d5077aa Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 10:09:02 -0700 Subject: [PATCH 019/655] docs(enterprise.md): cleanup docs --- docs/my-website/docs/proxy/enterprise.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index 01bc32783..3607cb07f 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -23,7 +23,7 @@ Features: - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - ✅ Set Max Request / File Size on Requests - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) -- **Spend Tracking** +- **Enterprise Spend Tracking Features** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) - ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) - **Advanced Metrics** From a3e4e4d4129accf402356cf9cdebea3a5f0454da Mon Sep 17 00:00:00 2001 From: maamalama Date: Thu, 25 Jul 2024 11:37:29 -0700 Subject: [PATCH 020/655] anthropic gateway fixes --- litellm/integrations/helicone.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index b00e14517..56c9c38bc 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -60,7 +60,7 @@ class HeliconeLogger: } } - return claude_provider_request, claude_response_obj + return claude_response_obj @staticmethod def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: @@ -119,7 +119,7 @@ class HeliconeLogger: response_obj = response_obj.json() if "claude" in model: - provider_request, response_obj = self.claude_mapping( + response_obj = self.claude_mapping( model=model, messages=messages, response_obj=response_obj ) @@ -130,7 +130,11 @@ class HeliconeLogger: } # Code to be executed + provider_url = self.provider_url url = "https://api.hconeai.com/oai/v1/log" + if "claude" in model: + url = "https://api.hconeai.com/anthropic/v1/log" + provider_url = "https://api.anthropic.com/v1/messages" headers = { "Authorization": f"Bearer {self.key}", "Content-Type": "application/json", @@ -147,7 +151,7 @@ class HeliconeLogger: meta.update(metadata) data = { "providerRequest": { - "url": self.provider_url, + "url": provider_url, "json": provider_request, "meta": meta, }, From 3293ad745805b65b10d42f26477888d27f462f5c Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Thu, 25 Jul 2024 19:29:55 +0000 Subject: [PATCH 021/655] Add Llama 3.1 405b for Bedrock --- litellm/llms/bedrock_httpx.py | 1 + litellm/model_prices_and_context_window_backup.json | 9 +++++++++ model_prices_and_context_window.json | 9 +++++++++ 3 files changed, 19 insertions(+) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 16c3f60b7..3f06a50b8 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -78,6 +78,7 @@ BEDROCK_CONVERSE_MODELS = [ "ai21.jamba-instruct-v1:0", "meta.llama3-1-8b-instruct-v1:0", "meta.llama3-1-70b-instruct-v1:0", + "meta.llama3-1-405b-instruct-v1:0", ] diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 667745c30..c05256d34 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3731,6 +3731,15 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "meta.llama3-1-405b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, + "litellm_provider": "bedrock", + "mode": "chat" + }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, "max_input_tokens": 77, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 667745c30..c05256d34 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3731,6 +3731,15 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "meta.llama3-1-405b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, + "litellm_provider": "bedrock", + "mode": "chat" + }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, "max_input_tokens": 77, From 5c4ee3ef3c042b40b438e87b22b563cc716afa6a Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Thu, 25 Jul 2024 20:00:29 +0000 Subject: [PATCH 022/655] Add mistral.mistral-large-2407-v1:0 on Amazon Bedrock. --- litellm/llms/bedrock_httpx.py | 1 + litellm/model_prices_and_context_window_backup.json | 9 +++++++++ model_prices_and_context_window.json | 9 +++++++++ 3 files changed, 19 insertions(+) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 16c3f60b7..59b8acad0 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -78,6 +78,7 @@ BEDROCK_CONVERSE_MODELS = [ "ai21.jamba-instruct-v1:0", "meta.llama3-1-8b-instruct-v1:0", "meta.llama3-1-70b-instruct-v1:0", + "mistral.mistral-large-2407-v1:0", ] diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 667745c30..66a5565f3 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2996,6 +2996,15 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "mistral.mistral-large-2407-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "bedrock", + "mode": "chat" + }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { "max_tokens": 8191, "max_input_tokens": 32000, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 667745c30..66a5565f3 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2996,6 +2996,15 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "mistral.mistral-large-2407-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "bedrock", + "mode": "chat" + }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { "max_tokens": 8191, "max_input_tokens": 32000, From 22c66991ed671a544bbf2df6aa6bd0bef1122b34 Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Thu, 25 Jul 2024 20:36:03 +0000 Subject: [PATCH 023/655] Support tool calling for Llama 3.1 on Amazon bedrock. --- litellm/llms/bedrock_httpx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 3f06a50b8..cb3832845 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -1316,6 +1316,7 @@ class AmazonConverseConfig: model.startswith("anthropic") or model.startswith("mistral") or model.startswith("cohere") + or model.startswith("meta.llama3-1") ): supported_params.append("tools") From 64adae6e7fd57a89e7c4693d833c705e169ac579 Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Thu, 25 Jul 2024 21:06:58 +0000 Subject: [PATCH 024/655] Check for converse support first. --- litellm/utils.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index e104de958..a597643a6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3121,7 +3121,19 @@ def get_optional_params( supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider ) - if "ai21" in model: + if model in litellm.BEDROCK_CONVERSE_MODELS: + _check_valid_arg(supported_params=supported_params) + optional_params = litellm.AmazonConverseConfig().map_openai_params( + model=model, + non_default_params=non_default_params, + optional_params=optional_params, + drop_params=( + drop_params + if drop_params is not None and isinstance(drop_params, bool) + else False + ), + ) + elif "ai21" in model: _check_valid_arg(supported_params=supported_params) # params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[], # https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra @@ -3143,17 +3155,6 @@ def get_optional_params( optional_params=optional_params, ) ) - elif model in litellm.BEDROCK_CONVERSE_MODELS: - optional_params = litellm.AmazonConverseConfig().map_openai_params( - model=model, - non_default_params=non_default_params, - optional_params=optional_params, - drop_params=( - drop_params - if drop_params is not None and isinstance(drop_params, bool) - else False - ), - ) else: optional_params = litellm.AmazonAnthropicConfig().map_openai_params( non_default_params=non_default_params, From bfdda089c8ab36c0920c234ce890c4aba9bea447 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 14:23:07 -0700 Subject: [PATCH 025/655] fix(proxy_server.py): check if input list > 0 before indexing into it resolves 'list index out of range' error --- litellm/proxy/_new_secret_config.yaml | 2 +- litellm/proxy/proxy_server.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index c4d2a6441..a81d133e5 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,4 +1,4 @@ model_list: - model_name: "test-model" litellm_params: - model: "openai/gpt-3.5-turbo-instruct-0914" \ No newline at end of file + model: "openai/text-embedding-ada-002" \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 106b95453..f22f25f73 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3334,6 +3334,7 @@ async def embeddings( if ( "input" in data and isinstance(data["input"], list) + and len(data["input"]) > 0 and isinstance(data["input"][0], list) and isinstance(data["input"][0][0], int) ): # check if array of tokens passed in @@ -3464,8 +3465,8 @@ async def embeddings( litellm_debug_info, ) verbose_proxy_logger.error( - "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format( - str(e) + "litellm.proxy.proxy_server.embeddings(): Exception occured - {}\n{}".format( + str(e), traceback.format_exc() ) ) verbose_proxy_logger.debug(traceback.format_exc()) From 711496e2600adc8510d6f0cdc8f0a482856b3e4c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 14:30:46 -0700 Subject: [PATCH 026/655] fix(router.py): add support for diskcache to router --- litellm/router.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index 11ad5fd9e..53013a759 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -263,7 +263,9 @@ class Router: ) # names of models under litellm_params. ex. azure/chatgpt-v-2 self.deployment_latency_map = {} ### CACHING ### - cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache + cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = ( + "local" # default to an in-memory cache + ) redis_cache = None cache_config = {} self.client_ttl = client_ttl From 6bf1b9353bbc675390cac2a5821eaa76a4788c28 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 15:33:05 -0700 Subject: [PATCH 027/655] feat(custom_llm.py): initial working commit for writing your own custom LLM handler Fixes https://github.com/BerriAI/litellm/issues/4675 Also Addresses https://github.com/BerriAI/litellm/discussions/4677 --- litellm/__init__.py | 9 ++++ litellm/llms/custom_llm.py | 70 ++++++++++++++++++++++++++++++++ litellm/main.py | 15 +++++++ litellm/tests/test_custom_llm.py | 63 ++++++++++++++++++++++++++++ litellm/types/llms/custom_llm.py | 10 +++++ litellm/utils.py | 16 ++++++++ 6 files changed, 183 insertions(+) create mode 100644 litellm/llms/custom_llm.py create mode 100644 litellm/tests/test_custom_llm.py create mode 100644 litellm/types/llms/custom_llm.py diff --git a/litellm/__init__.py b/litellm/__init__.py index 956834afc..0527ef199 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -813,6 +813,7 @@ from .utils import ( ) from .types.utils import ImageObject +from .llms.custom_llm import CustomLLM from .llms.huggingface_restapi import HuggingfaceConfig from .llms.anthropic import AnthropicConfig from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig @@ -909,3 +910,11 @@ from .cost_calculator import response_cost_calculator, cost_per_token from .types.adapter import AdapterItem adapters: List[AdapterItem] = [] + +### CUSTOM LLMs ### +from .types.llms.custom_llm import CustomLLMItem + +custom_provider_map: List[CustomLLMItem] = [] +_custom_providers: List[str] = ( + [] +) # internal helper util, used to track names of custom providers diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py new file mode 100644 index 000000000..fac1eb293 --- /dev/null +++ b/litellm/llms/custom_llm.py @@ -0,0 +1,70 @@ +# What is this? +## Handler file for a Custom Chat LLM + +""" +- completion +- acompletion +- streaming +- async_streaming +""" + +import copy +import json +import os +import time +import types +from enum import Enum +from functools import partial +from typing import Callable, List, Literal, Optional, Tuple, Union + +import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.databricks import GenericStreamingChunk +from litellm.types.utils import ProviderField +from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage + +from .base import BaseLLM +from .prompt_templates.factory import custom_prompt, prompt_factory + + +class CustomLLMError(Exception): # use this for all your exceptions + def __init__( + self, + status_code, + message, + ): + self.status_code = status_code + self.message = message + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs + + +def custom_chat_llm_router(): + """ + Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type + + Validates if response is in expected format + """ + pass + + +class CustomLLM(BaseLLM): + def __init__(self) -> None: + super().__init__() + + def completion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + def streaming(self, *args, **kwargs): + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def acompletion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def astreaming(self, *args, **kwargs): + raise CustomLLMError(status_code=500, message="Not implemented yet!") diff --git a/litellm/main.py b/litellm/main.py index f724a68bd..539c3d3e1 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -107,6 +107,7 @@ from .llms.anthropic_text import AnthropicTextCompletion from .llms.azure import AzureChatCompletion from .llms.azure_text import AzureTextCompletion from .llms.bedrock_httpx import BedrockConverseLLM, BedrockLLM +from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks import DatabricksChatCompletion from .llms.huggingface_restapi import Huggingface from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion @@ -2690,6 +2691,20 @@ def completion( model_response.created = int(time.time()) model_response.model = model response = model_response + elif ( + custom_llm_provider in litellm._custom_providers + ): # Assume custom LLM provider + # Get the Custom Handler + custom_handler: Optional[CustomLLM] = None + for item in litellm.custom_provider_map: + if item["provider"] == custom_llm_provider: + custom_handler = item["custom_handler"] + + if custom_handler is None: + raise ValueError( + f"Unable to map your input to a model. Check your input - {args}" + ) + response = custom_handler.completion() else: raise ValueError( f"Unable to map your input to a model. Check your input - {args}" diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py new file mode 100644 index 000000000..0506986eb --- /dev/null +++ b/litellm/tests/test_custom_llm.py @@ -0,0 +1,63 @@ +# What is this? +## Unit tests for the CustomLLM class + + +import asyncio +import os +import sys +import time +import traceback + +import openai +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import os +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +from dotenv import load_dotenv + +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + +def test_get_llm_provider(): + from litellm.utils import custom_llm_setup + + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + + custom_llm_setup() + + model, provider, _, _ = get_llm_provider(model="custom_llm/my-fake-model") + + assert provider == "custom_llm" + + +def test_simple_completion(): + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + resp = completion( + model="custom_llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + + assert resp.choices[0].message.content == "Hi!" diff --git a/litellm/types/llms/custom_llm.py b/litellm/types/llms/custom_llm.py new file mode 100644 index 000000000..d5499a419 --- /dev/null +++ b/litellm/types/llms/custom_llm.py @@ -0,0 +1,10 @@ +from typing import List + +from typing_extensions import Dict, Required, TypedDict, override + +from litellm.llms.custom_llm import CustomLLM + + +class CustomLLMItem(TypedDict): + provider: str + custom_handler: CustomLLM diff --git a/litellm/utils.py b/litellm/utils.py index e104de958..0f1b0315d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -330,6 +330,18 @@ class Rules: ####### CLIENT ################### # make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking +def custom_llm_setup(): + """ + Add custom_llm provider to provider list + """ + for custom_llm in litellm.custom_provider_map: + if custom_llm["provider"] not in litellm.provider_list: + litellm.provider_list.append(custom_llm["provider"]) + + if custom_llm["provider"] not in litellm._custom_providers: + litellm._custom_providers.append(custom_llm["provider"]) + + def function_setup( original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. @@ -341,6 +353,10 @@ def function_setup( try: global callback_list, add_breadcrumb, user_logger_fn, Logging + ## CUSTOM LLM SETUP ## + custom_llm_setup() + + ## LOGGING SETUP function_id = kwargs["id"] if "id" in kwargs else None if len(litellm.callbacks) > 0: From 9f97436308de5c1ddc1acf14567b0caf0c23ab2d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 15:51:39 -0700 Subject: [PATCH 028/655] fix(custom_llm.py): support async completion calls --- litellm/llms/custom_llm.py | 26 +++++++++++++++++--------- litellm/main.py | 10 +++++++++- litellm/tests/test_custom_llm.py | 25 ++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py index fac1eb293..5e9933194 100644 --- a/litellm/llms/custom_llm.py +++ b/litellm/llms/custom_llm.py @@ -44,15 +44,6 @@ class CustomLLMError(Exception): # use this for all your exceptions ) # Call the base class constructor with the parameters it needs -def custom_chat_llm_router(): - """ - Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type - - Validates if response is in expected format - """ - pass - - class CustomLLM(BaseLLM): def __init__(self) -> None: super().__init__() @@ -68,3 +59,20 @@ class CustomLLM(BaseLLM): async def astreaming(self, *args, **kwargs): raise CustomLLMError(status_code=500, message="Not implemented yet!") + + +def custom_chat_llm_router( + async_fn: bool, stream: Optional[bool], custom_llm: CustomLLM +): + """ + Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type + + Validates if response is in expected format + """ + if async_fn: + if stream: + return custom_llm.astreaming + return custom_llm.acompletion + if stream: + return custom_llm.streaming + return custom_llm.completion diff --git a/litellm/main.py b/litellm/main.py index 539c3d3e1..51e7c611c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -382,6 +382,7 @@ async def acompletion( or custom_llm_provider == "clarifai" or custom_llm_provider == "watsonx" or custom_llm_provider in litellm.openai_compatible_providers + or custom_llm_provider in litellm._custom_providers ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. init_response = await loop.run_in_executor(None, func_with_context) if isinstance(init_response, dict) or isinstance( @@ -2704,7 +2705,14 @@ def completion( raise ValueError( f"Unable to map your input to a model. Check your input - {args}" ) - response = custom_handler.completion() + + ## ROUTE LLM CALL ## + handler_fn = custom_chat_llm_router( + async_fn=acompletion, stream=stream, custom_llm=custom_handler + ) + + ## CALL FUNCTION + response = handler_fn() else: raise ValueError( f"Unable to map your input to a model. Check your input - {args}" diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py index 0506986eb..fd46c892e 100644 --- a/litellm/tests/test_custom_llm.py +++ b/litellm/tests/test_custom_llm.py @@ -23,7 +23,7 @@ import httpx from dotenv import load_dotenv import litellm -from litellm import CustomLLM, completion, get_llm_provider +from litellm import CustomLLM, acompletion, completion, get_llm_provider class MyCustomLLM(CustomLLM): @@ -35,6 +35,15 @@ class MyCustomLLM(CustomLLM): ) # type: ignore +class MyCustomAsyncLLM(CustomLLM): + async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + def test_get_llm_provider(): from litellm.utils import custom_llm_setup @@ -61,3 +70,17 @@ def test_simple_completion(): ) assert resp.choices[0].message.content == "Hi!" + + +@pytest.mark.asyncio +async def test_simple_acompletion(): + my_custom_llm = MyCustomAsyncLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + resp = await acompletion( + model="custom_llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + + assert resp.choices[0].message.content == "Hi!" From b4e3a77ad0b823fb5ab44f6ee92a48e2b929993d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 16:47:32 -0700 Subject: [PATCH 029/655] feat(utils.py): support sync streaming for custom llm provider --- litellm/__init__.py | 1 + litellm/llms/custom_llm.py | 19 ++++-- litellm/main.py | 8 +++ litellm/tests/test_custom_llm.py | 111 +++++++++++++++++++++++++++++-- litellm/utils.py | 10 ++- 5 files changed, 139 insertions(+), 10 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 0527ef199..b6aacad1a 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -913,6 +913,7 @@ adapters: List[AdapterItem] = [] ### CUSTOM LLMs ### from .types.llms.custom_llm import CustomLLMItem +from .types.utils import GenericStreamingChunk custom_provider_map: List[CustomLLMItem] = [] _custom_providers: List[str] = ( diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py index 5e9933194..f00d02ab7 100644 --- a/litellm/llms/custom_llm.py +++ b/litellm/llms/custom_llm.py @@ -15,7 +15,17 @@ import time import types from enum import Enum from functools import partial -from typing import Callable, List, Literal, Optional, Tuple, Union +from typing import ( + Any, + AsyncIterator, + Callable, + Iterator, + List, + Literal, + Optional, + Tuple, + Union, +) import httpx # type: ignore import requests # type: ignore @@ -23,8 +33,7 @@ import requests # type: ignore import litellm from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.types.llms.databricks import GenericStreamingChunk -from litellm.types.utils import ProviderField +from litellm.types.utils import GenericStreamingChunk, ProviderField from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage from .base import BaseLLM @@ -51,13 +60,13 @@ class CustomLLM(BaseLLM): def completion(self, *args, **kwargs) -> ModelResponse: raise CustomLLMError(status_code=500, message="Not implemented yet!") - def streaming(self, *args, **kwargs): + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: raise CustomLLMError(status_code=500, message="Not implemented yet!") async def acompletion(self, *args, **kwargs) -> ModelResponse: raise CustomLLMError(status_code=500, message="Not implemented yet!") - async def astreaming(self, *args, **kwargs): + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: raise CustomLLMError(status_code=500, message="Not implemented yet!") diff --git a/litellm/main.py b/litellm/main.py index 51e7c611c..c3be01373 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2713,6 +2713,14 @@ def completion( ## CALL FUNCTION response = handler_fn() + if stream is True: + return CustomStreamWrapper( + completion_stream=response, + model=model, + custom_llm_provider=custom_llm_provider, + logging_obj=logging, + ) + else: raise ValueError( f"Unable to map your input to a model. Check your input - {args}" diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py index fd46c892e..4cc355e4b 100644 --- a/litellm/tests/test_custom_llm.py +++ b/litellm/tests/test_custom_llm.py @@ -17,13 +17,80 @@ sys.path.insert( import os from collections import defaultdict from concurrent.futures import ThreadPoolExecutor +from typing import Any, AsyncIterator, Iterator, Union from unittest.mock import AsyncMock, MagicMock, patch import httpx from dotenv import load_dotenv import litellm -from litellm import CustomLLM, acompletion, completion, get_llm_provider +from litellm import ( + ChatCompletionDeltaChunk, + ChatCompletionUsageBlock, + CustomLLM, + GenericStreamingChunk, + ModelResponse, + acompletion, + completion, + get_llm_provider, +) +from litellm.utils import ModelResponseIterator + + +class CustomModelResponseIterator: + def __init__(self, streaming_response: Union[Iterator, AsyncIterator]): + self.streaming_response = streaming_response + + def chunk_parser(self, chunk: Any) -> GenericStreamingChunk: + return GenericStreamingChunk( + text="hello world", + tool_use=None, + is_finished=True, + finish_reason="stop", + usage=ChatCompletionUsageBlock( + prompt_tokens=10, completion_tokens=20, total_tokens=30 + ), + index=0, + ) + + # Sync iterator + def __iter__(self): + return self + + def __next__(self) -> GenericStreamingChunk: + try: + chunk: Any = self.streaming_response.__next__() # type: ignore + except StopIteration: + raise StopIteration + except ValueError as e: + raise RuntimeError(f"Error receiving chunk from stream: {e}") + + try: + return self.chunk_parser(chunk=chunk) + except StopIteration: + raise StopIteration + except ValueError as e: + raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}") + + # Async iterator + def __aiter__(self): + self.async_response_iterator = self.streaming_response.__aiter__() # type: ignore + return self + + async def __anext__(self) -> GenericStreamingChunk: + try: + chunk = await self.async_response_iterator.__anext__() + except StopAsyncIteration: + raise StopAsyncIteration + except ValueError as e: + raise RuntimeError(f"Error receiving chunk from stream: {e}") + + try: + return self.chunk_parser(chunk=chunk) + except StopIteration: + raise StopIteration + except ValueError as e: + raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}") class MyCustomLLM(CustomLLM): @@ -34,8 +101,6 @@ class MyCustomLLM(CustomLLM): mock_response="Hi!", ) # type: ignore - -class MyCustomAsyncLLM(CustomLLM): async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse: return litellm.completion( model="gpt-3.5-turbo", @@ -43,8 +108,27 @@ class MyCustomAsyncLLM(CustomLLM): mock_response="Hi!", ) # type: ignore + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + generic_streaming_chunk: GenericStreamingChunk = { + "finish_reason": "stop", + "index": 0, + "is_finished": True, + "text": "Hello world", + "tool_use": None, + "usage": {"completion_tokens": 10, "prompt_tokens": 20, "total_tokens": 30}, + } + + completion_stream = ModelResponseIterator( + model_response=generic_streaming_chunk # type: ignore + ) + custom_iterator = CustomModelResponseIterator( + streaming_response=completion_stream + ) + return custom_iterator + def test_get_llm_provider(): + """""" from litellm.utils import custom_llm_setup my_custom_llm = MyCustomLLM() @@ -74,7 +158,7 @@ def test_simple_completion(): @pytest.mark.asyncio async def test_simple_acompletion(): - my_custom_llm = MyCustomAsyncLLM() + my_custom_llm = MyCustomLLM() litellm.custom_provider_map = [ {"provider": "custom_llm", "custom_handler": my_custom_llm} ] @@ -84,3 +168,22 @@ async def test_simple_acompletion(): ) assert resp.choices[0].message.content == "Hi!" + + +def test_simple_completion_streaming(): + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + resp = completion( + model="custom_llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + stream=True, + ) + + for chunk in resp: + print(chunk) + if chunk.choices[0].finish_reason is None: + assert isinstance(chunk.choices[0].delta.content, str) + else: + assert chunk.choices[0].finish_reason == "stop" diff --git a/litellm/utils.py b/litellm/utils.py index 0f1b0315d..c14ab36dd 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9262,7 +9262,10 @@ class CustomStreamWrapper: try: # return this for all models completion_obj = {"content": ""} - if self.custom_llm_provider and self.custom_llm_provider == "anthropic": + if self.custom_llm_provider and ( + self.custom_llm_provider == "anthropic" + or self.custom_llm_provider in litellm._custom_providers + ): from litellm.types.utils import GenericStreamingChunk as GChunk if self.received_finish_reason is not None: @@ -10981,3 +10984,8 @@ class ModelResponseIterator: raise StopAsyncIteration self.is_done = True return self.model_response + + +class CustomModelResponseIterator(Iterable): + def __init__(self) -> None: + super().__init__() From 060249c7e0477fee7740a856b4bb7d58ba3c8079 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 17:11:57 -0700 Subject: [PATCH 030/655] feat(utils.py): support async streaming for custom llm provider --- litellm/llms/custom_llm.py | 2 ++ litellm/tests/test_custom_llm.py | 36 ++++++++++++++++++++++++++++++-- litellm/utils.py | 2 ++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py index f00d02ab7..f1b2b28b4 100644 --- a/litellm/llms/custom_llm.py +++ b/litellm/llms/custom_llm.py @@ -17,8 +17,10 @@ from enum import Enum from functools import partial from typing import ( Any, + AsyncGenerator, AsyncIterator, Callable, + Coroutine, Iterator, List, Literal, diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py index 4cc355e4b..af88b1f3a 100644 --- a/litellm/tests/test_custom_llm.py +++ b/litellm/tests/test_custom_llm.py @@ -17,7 +17,7 @@ sys.path.insert( import os from collections import defaultdict from concurrent.futures import ThreadPoolExecutor -from typing import Any, AsyncIterator, Iterator, Union +from typing import Any, AsyncGenerator, AsyncIterator, Coroutine, Iterator, Union from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -75,7 +75,7 @@ class CustomModelResponseIterator: # Async iterator def __aiter__(self): self.async_response_iterator = self.streaming_response.__aiter__() # type: ignore - return self + return self.streaming_response async def __anext__(self) -> GenericStreamingChunk: try: @@ -126,6 +126,18 @@ class MyCustomLLM(CustomLLM): ) return custom_iterator + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: # type: ignore + generic_streaming_chunk: GenericStreamingChunk = { + "finish_reason": "stop", + "index": 0, + "is_finished": True, + "text": "Hello world", + "tool_use": None, + "usage": {"completion_tokens": 10, "prompt_tokens": 20, "total_tokens": 30}, + } + + yield generic_streaming_chunk # type: ignore + def test_get_llm_provider(): """""" @@ -187,3 +199,23 @@ def test_simple_completion_streaming(): assert isinstance(chunk.choices[0].delta.content, str) else: assert chunk.choices[0].finish_reason == "stop" + + +@pytest.mark.asyncio +async def test_simple_completion_async_streaming(): + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + resp = await litellm.acompletion( + model="custom_llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + stream=True, + ) + + async for chunk in resp: + print(chunk) + if chunk.choices[0].finish_reason is None: + assert isinstance(chunk.choices[0].delta.content, str) + else: + assert chunk.choices[0].finish_reason == "stop" diff --git a/litellm/utils.py b/litellm/utils.py index c14ab36dd..9158afb74 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -10132,6 +10132,7 @@ class CustomStreamWrapper: try: if self.completion_stream is None: await self.fetch_stream() + if ( self.custom_llm_provider == "openai" or self.custom_llm_provider == "azure" @@ -10156,6 +10157,7 @@ class CustomStreamWrapper: or self.custom_llm_provider == "triton" or self.custom_llm_provider == "watsonx" or self.custom_llm_provider in litellm.openai_compatible_endpoints + or self.custom_llm_provider in litellm._custom_providers ): async for chunk in self.completion_stream: print_verbose(f"value of async chunk: {chunk}") From e3142b4294cfd5b0b5219607f99d1b554a2a11ff Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 17:22:57 -0700 Subject: [PATCH 031/655] fix whisper health check with litellm --- litellm/llms/openai.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 25e2e518c..2c7a7a4df 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -1,5 +1,6 @@ import hashlib import json +import os import time import traceback import types @@ -1870,6 +1871,16 @@ class OpenAIChatCompletion(BaseLLM): model=model, # type: ignore prompt=prompt, # type: ignore ) + elif mode == "audio_transcription": + # Get the current directory of the file being run + pwd = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(pwd, "../tests/gettysburg.wav") + audio_file = open(file_path, "rb") + completion = await client.audio.transcriptions.with_raw_response.create( + file=audio_file, + model=model, # type: ignore + prompt=prompt, # type: ignore + ) else: raise Exception("mode not set") response = {} From 2432c90515229da4d80d9ec298c315e7c9040a57 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 17:26:14 -0700 Subject: [PATCH 032/655] feat - support health check audio_speech --- litellm/llms/openai.py | 9 ++++++++- litellm/proxy/proxy_config.yaml | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 2c7a7a4df..fae8a448a 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -1881,8 +1881,15 @@ class OpenAIChatCompletion(BaseLLM): model=model, # type: ignore prompt=prompt, # type: ignore ) + elif mode == "audio_speech": + # Get the current directory of the file being run + completion = await client.audio.speech.with_raw_response.create( + model=model, # type: ignore + input=prompt, # type: ignore + voice="alloy", + ) else: - raise Exception("mode not set") + raise ValueError("mode not set, passed in mode: " + mode) response = {} if completion is None or not hasattr(completion, "headers"): diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 0e3f0826e..bd8f5bfd0 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -8,6 +8,12 @@ model_list: litellm_params: model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct api_key: "os.environ/FIREWORKS" + - model_name: tts + litellm_params: + model: openai/tts-1 + api_key: "os.environ/OPENAI_API_KEY" + model_info: + mode: audio_speech general_settings: master_key: sk-1234 alerting: ["slack"] From 3573b47098c52b1dc506e8918b46f5ee471bca28 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 17:29:28 -0700 Subject: [PATCH 033/655] docs add example on using text to speech models --- docs/my-website/docs/proxy/health.md | 57 +++++++++++++++++----------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 6d383fc41..632702b91 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -41,28 +41,6 @@ litellm --health } ``` -### Background Health Checks - -You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. - -Here's how to use it: -1. in the config.yaml add: -``` -general_settings: - background_health_checks: True # enable background health checks - health_check_interval: 300 # frequency of background health checks -``` - -2. Start server -``` -$ litellm /path/to/config.yaml -``` - -3. Query health endpoint: -``` -curl --location 'http://0.0.0.0:4000/health' -``` - ### Embedding Models We need some way to know if the model is an embedding model when running checks, if you have this in your config, specifying mode it makes an embedding health check @@ -124,6 +102,41 @@ model_list: mode: audio_transcription ``` + +### Text to Speech Models + +```yaml +# OpenAI Text to Speech Models + - model_name: tts + litellm_params: + model: openai/tts-1 + api_key: "os.environ/OPENAI_API_KEY" + model_info: + mode: audio_speech +``` + +## Background Health Checks + +You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. + +Here's how to use it: +1. in the config.yaml add: +``` +general_settings: + background_health_checks: True # enable background health checks + health_check_interval: 300 # frequency of background health checks +``` + +2. Start server +``` +$ litellm /path/to/config.yaml +``` + +3. Query health endpoint: +``` +curl --location 'http://0.0.0.0:4000/health' +``` + ### Hide details The health check response contains details like endpoint URLs, error messages, From f2443996d82d50e88ecfbca4efb045fc0522aa84 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 17:30:15 -0700 Subject: [PATCH 034/655] feat support audio health checks for azure --- litellm/llms/azure.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index a2928cf20..ec143f3fe 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -1864,6 +1864,23 @@ class AzureChatCompletion(BaseLLM): model=model, # type: ignore prompt=prompt, # type: ignore ) + elif mode == "audio_transcription": + # Get the current directory of the file being run + pwd = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(pwd, "../tests/gettysburg.wav") + audio_file = open(file_path, "rb") + completion = await client.audio.transcriptions.with_raw_response.create( + file=audio_file, + model=model, # type: ignore + prompt=prompt, # type: ignore + ) + elif mode == "audio_speech": + # Get the current directory of the file being run + completion = await client.audio.speech.with_raw_response.create( + model=model, # type: ignore + input=prompt, # type: ignore + voice="alloy", + ) else: raise Exception("mode not set") response = {} From 3814170ae17d748110058a0c411ad7eccc786b6a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 17:41:16 -0700 Subject: [PATCH 035/655] docs - add info about routing strategy on load balancing docs --- docs/my-website/docs/proxy/reliability.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md index 2404c744c..a3f03b3d7 100644 --- a/docs/my-website/docs/proxy/reliability.md +++ b/docs/my-website/docs/proxy/reliability.md @@ -31,8 +31,19 @@ model_list: api_base: https://openai-france-1234.openai.azure.com/ api_key: rpm: 1440 +routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" + model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo` + num_retries: 2 + timeout: 30 # 30 seconds + redis_host: # set this when using multiple litellm proxy deployments, load balancing state stored in redis + redis_password: + redis_port: 1992 ``` +:::info +Detailed information about [routing strategies can be found here](../routing) +::: + #### Step 2: Start Proxy with config ```shell From a2d07cfe64e24f2a42612213f46e49114a94ff8e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 17:41:19 -0700 Subject: [PATCH 036/655] docs(custom_llm_server.md): add calling custom llm server to docs --- .../docs/providers/custom_llm_server.md | 73 ++++++++++ .../docs/providers/custom_openai_proxy.md | 129 ------------------ docs/my-website/sidebars.js | 3 +- 3 files changed, 75 insertions(+), 130 deletions(-) create mode 100644 docs/my-website/docs/providers/custom_llm_server.md delete mode 100644 docs/my-website/docs/providers/custom_openai_proxy.md diff --git a/docs/my-website/docs/providers/custom_llm_server.md b/docs/my-website/docs/providers/custom_llm_server.md new file mode 100644 index 000000000..f8d5fb551 --- /dev/null +++ b/docs/my-website/docs/providers/custom_llm_server.md @@ -0,0 +1,73 @@ +# Custom API Server (Custom Format) + +LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format + + +:::info + +For calling an openai-compatible endpoint, [go here](./openai_compatible.md) +::: + +## Quick Start + +```python +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + +litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER + {"provider": "my-custom-llm", "custom_handler": my_custom_llm} + ] + +resp = completion( + model="my-custom-llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + +assert resp.choices[0].message.content == "Hi!" +``` + + +## Custom Handler Spec + +```python +from litellm.types.utils import GenericStreamingChunk, ModelResponse +from typing import Iterator, AsyncIterator +from litellm.llms.base import BaseLLM + +class CustomLLMError(Exception): # use this for all your exceptions + def __init__( + self, + status_code, + message, + ): + self.status_code = status_code + self.message = message + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs + +class CustomLLM(BaseLLM): + def __init__(self) -> None: + super().__init__() + + def completion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def acompletion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: + raise CustomLLMError(status_code=500, message="Not implemented yet!") +``` \ No newline at end of file diff --git a/docs/my-website/docs/providers/custom_openai_proxy.md b/docs/my-website/docs/providers/custom_openai_proxy.md deleted file mode 100644 index b6f2eccac..000000000 --- a/docs/my-website/docs/providers/custom_openai_proxy.md +++ /dev/null @@ -1,129 +0,0 @@ -# Custom API Server (OpenAI Format) - -LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format - -## API KEYS -No api keys required - -## Set up your Custom API Server -Your server should have the following Endpoints: - -Here's an example OpenAI proxy server with routes: https://replit.com/@BerriAI/openai-proxy#main.py - -### Required Endpoints -- POST `/chat/completions` - chat completions endpoint - -### Optional Endpoints -- POST `/completions` - completions endpoint -- Get `/models` - available models on server -- POST `/embeddings` - creates an embedding vector representing the input text. - - -## Example Usage - -### Call `/chat/completions` -In order to use your custom OpenAI Chat Completion proxy with LiteLLM, ensure you set - -* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co" -* `custom_llm_provider` to `openai` this ensures litellm uses the `openai.ChatCompletion` to your api_base - -```python -import os -from litellm import completion - -## set ENV variables -os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy - -messages = [{ "content": "Hello, how are you?","role": "user"}] - -response = completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://openai-proxy.berriai.repl.co", - custom_llm_provider="openai" # litellm will use the openai.ChatCompletion to make the request - -) -print(response) -``` - -#### Response -```json -{ - "object": - "chat.completion", - "choices": [{ - "finish_reason": "stop", - "index": 0, - "message": { - "content": - "The sky, a canvas of blue,\nA work of art, pure and true,\nA", - "role": "assistant" - } - }], - "id": - "chatcmpl-7fbd6077-de10-4cb4-a8a4-3ef11a98b7c8", - "created": - 1699290237.408061, - "model": - "togethercomputer/llama-2-70b-chat", - "usage": { - "completion_tokens": 18, - "prompt_tokens": 14, - "total_tokens": 32 - } - } -``` - - -### Call `/completions` -In order to use your custom OpenAI Completion proxy with LiteLLM, ensure you set - -* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co" -* `custom_llm_provider` to `text-completion-openai` this ensures litellm uses the `openai.Completion` to your api_base - -```python -import os -from litellm import completion - -## set ENV variables -os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy - -messages = [{ "content": "Hello, how are you?","role": "user"}] - -response = completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://openai-proxy.berriai.repl.co", - custom_llm_provider="text-completion-openai" # litellm will use the openai.Completion to make the request - -) -print(response) -``` - -#### Response -```json -{ - "warning": - "This model version is deprecated. Migrate before January 4, 2024 to avoid disruption of service. Learn more https://platform.openai.com/docs/deprecations", - "id": - "cmpl-8HxHqF5dymQdALmLplS0dWKZVFe3r", - "object": - "text_completion", - "created": - 1699290166, - "model": - "text-davinci-003", - "choices": [{ - "text": - "\n\nThe weather in San Francisco varies depending on what time of year and time", - "index": 0, - "logprobs": None, - "finish_reason": "length" - }], - "usage": { - "prompt_tokens": 7, - "completion_tokens": 16, - "total_tokens": 23 - } - } -``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index d228e09d2..c1ce83068 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -175,7 +175,8 @@ const sidebars = { "providers/aleph_alpha", "providers/baseten", "providers/openrouter", - "providers/custom_openai_proxy", + // "providers/custom_openai_proxy", + "providers/custom_llm_server", "providers/petals", ], From bd7af04a725e74290aeb0d87889538041aa0cc3a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 17:56:34 -0700 Subject: [PATCH 037/655] feat(proxy_server.py): support custom llm handler on proxy --- .../docs/providers/custom_llm_server.md | 97 ++++++++++++++++++- litellm/proxy/_new_secret_config.yaml | 9 +- litellm/proxy/custom_handler.py | 21 ++++ litellm/proxy/proxy_server.py | 15 +++ 4 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 litellm/proxy/custom_handler.py diff --git a/docs/my-website/docs/providers/custom_llm_server.md b/docs/my-website/docs/providers/custom_llm_server.md index f8d5fb551..70fc4cea5 100644 --- a/docs/my-website/docs/providers/custom_llm_server.md +++ b/docs/my-website/docs/providers/custom_llm_server.md @@ -35,6 +35,101 @@ resp = completion( assert resp.choices[0].message.content == "Hi!" ``` +## OpenAI Proxy Usage + +1. Setup your `custom_handler.py` file + +```python +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + +my_custom_llm = MyCustomLLM() +``` + +2. Add to `config.yaml` + +In the config below, we pass + +python_filename: `custom_handler.py` +custom_handler_instance_name: `my_custom_llm`. This is defined in Step 1 + +custom_handler: `custom_handler.my_custom_llm` + +```yaml +model_list: + - model_name: "test-model" + litellm_params: + model: "openai/text-embedding-ada-002" + - model_name: "my-custom-model" + litellm_params: + model: "my-custom-llm/my-model" + +litellm_settings: + custom_provider_map: + - {"provider": "my-custom-llm", "custom_handler": custom_handler.my_custom_llm} +``` + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "my-custom-model", + "messages": [{"role": "user", "content": "Say \"this is a test\" in JSON!"}], +}' +``` + +Expected Response + +``` +{ + "id": "chatcmpl-06f1b9cd-08bc-43f7-9814-a69173921216", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "Hi!", + "role": "assistant", + "tool_calls": null, + "function_call": null + } + } + ], + "created": 1721955063, + "model": "gpt-3.5-turbo", + "object": "chat.completion", + "system_fingerprint": null, + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } +} +``` ## Custom Handler Spec @@ -70,4 +165,4 @@ class CustomLLM(BaseLLM): async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: raise CustomLLMError(status_code=500, message="Not implemented yet!") -``` \ No newline at end of file +``` diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index a81d133e5..0854f0901 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,4 +1,11 @@ model_list: - model_name: "test-model" litellm_params: - model: "openai/text-embedding-ada-002" \ No newline at end of file + model: "openai/text-embedding-ada-002" + - model_name: "my-custom-model" + litellm_params: + model: "my-custom-llm/my-model" + +litellm_settings: + custom_provider_map: + - {"provider": "my-custom-llm", "custom_handler": custom_handler.my_custom_llm} \ No newline at end of file diff --git a/litellm/proxy/custom_handler.py b/litellm/proxy/custom_handler.py new file mode 100644 index 000000000..56943c34d --- /dev/null +++ b/litellm/proxy/custom_handler.py @@ -0,0 +1,21 @@ +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + +my_custom_llm = MyCustomLLM() diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f22f25f73..bad1abae2 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1507,6 +1507,21 @@ class ProxyConfig: verbose_proxy_logger.debug( f"litellm.post_call_rules: {litellm.post_call_rules}" ) + elif key == "custom_provider_map": + from litellm.utils import custom_llm_setup + + litellm.custom_provider_map = [ + { + "provider": item["provider"], + "custom_handler": get_instance_fn( + value=item["custom_handler"], + config_file_path=config_file_path, + ), + } + for item in value + ] + + custom_llm_setup() elif key == "success_callback": litellm.success_callback = [] From e67daf79be891448a278001b8d2637e1ed345af0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 18:22:35 -0700 Subject: [PATCH 038/655] router support setting pass_through_all_models --- litellm/types/router.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/types/router.py b/litellm/types/router.py index 78dfbc4c1..285732121 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -540,3 +540,6 @@ class RouterGeneralSettings(BaseModel): async_only_mode: bool = Field( default=False ) # this will only initialize async clients. Good for memory utils + pass_through_all_models: bool = Field( + default=False + ) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding From 8f4c5437b8bf7a1fa4501c305e68ace752ab73ea Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 18:34:12 -0700 Subject: [PATCH 039/655] router support setting pass_through_all_models --- litellm/proxy/proxy_server.py | 15 +++++++++++++++ litellm/router.py | 14 ++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f22f25f73..022bb3040 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2885,6 +2885,11 @@ async def chat_completion( and llm_router.default_deployment is not None ): # model in router deployments, calling a specific deployment on the router tasks.append(llm_router.acompletion(**data)) + elif ( + llm_router is not None + and llm_router.router_general_settings.pass_through_all_models is True + ): + tasks.append(litellm.acompletion(**data)) elif user_model is not None: # `litellm --model ` tasks.append(litellm.acompletion(**data)) else: @@ -3147,6 +3152,11 @@ async def completion( llm_response = asyncio.create_task(llm_router.atext_completion(**data)) elif user_model is not None: # `litellm --model ` llm_response = asyncio.create_task(litellm.atext_completion(**data)) + elif ( + llm_router is not None + and llm_router.router_general_settings.pass_through_all_models is True + ): + llm_response = asyncio.create_task(litellm.atext_completion(**data)) else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -3405,6 +3415,11 @@ async def embeddings( and llm_router.default_deployment is not None ): # model in router deployments, calling a specific deployment on the router tasks.append(llm_router.aembedding(**data)) + elif ( + llm_router is not None + and llm_router.router_general_settings.pass_through_all_models is True + ): + tasks.append(litellm.aembedding(**data)) elif user_model is not None: # `litellm --model ` tasks.append(litellm.aembedding(**data)) else: diff --git a/litellm/router.py b/litellm/router.py index 53013a759..d1198aa15 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -174,7 +174,9 @@ class Router: routing_strategy_args: dict = {}, # just for latency-based routing semaphore: Optional[asyncio.Semaphore] = None, alerting_config: Optional[AlertingConfig] = None, - router_general_settings: Optional[RouterGeneralSettings] = None, + router_general_settings: Optional[ + RouterGeneralSettings + ] = RouterGeneralSettings(), ) -> None: """ Initialize the Router class with the given parameters for caching, reliability, and routing strategy. @@ -253,8 +255,8 @@ class Router: verbose_router_logger.setLevel(logging.INFO) elif debug_level == "DEBUG": verbose_router_logger.setLevel(logging.DEBUG) - self.router_general_settings: Optional[RouterGeneralSettings] = ( - router_general_settings + self.router_general_settings: RouterGeneralSettings = ( + router_general_settings or RouterGeneralSettings() ) self.assistants_config = assistants_config @@ -3554,7 +3556,11 @@ class Router: # Check if user is trying to use model_name == "*" # this is a catch all model for their specific api key if deployment.model_name == "*": - self.default_deployment = deployment.to_json(exclude_none=True) + if deployment.litellm_params.model == "*": + # user wants to pass through all requests to litellm.acompletion for unknown deployments + self.router_general_settings.pass_through_all_models = True + else: + self.default_deployment = deployment.to_json(exclude_none=True) # Azure GPT-Vision Enhancements, users can pass os.environ/ data_sources = deployment.litellm_params.get("dataSources", []) or [] From 35203cede7c87433ef14ebd4f0ae9c14da363320 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 18:40:04 -0700 Subject: [PATCH 040/655] add ANTHROPIC_API_KEY on build and test --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index e3593e815..a9a5be671 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -208,6 +208,7 @@ jobs: -e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \ -e MISTRAL_API_KEY=$MISTRAL_API_KEY \ -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ -e AUTO_INFER_REGION=True \ From a9a946a6602a2c0cb3abcb99918a471f3db5cd4a Mon Sep 17 00:00:00 2001 From: yujonglee Date: Thu, 25 Jul 2024 22:36:10 +0900 Subject: [PATCH 041/655] install canary (default UI) --- docs/my-website/docusaurus.config.js | 9 + docs/my-website/package.json | 3 +- docs/my-website/yarn.lock | 407 +++++++++++++-------------- 3 files changed, 203 insertions(+), 216 deletions(-) diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index a279e56d8..4fb1c51c2 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -28,6 +28,15 @@ const config = { }, plugins: [ + [ + require.resolve("@getcanary/docusaurus-pagefind"), + { + styles: { + "--canary-color-primary-c": 0.1, + "--canary-color-primary-h": 270, + }, + }, + ], [ '@docusaurus/plugin-ideal-image', { diff --git a/docs/my-website/package.json b/docs/my-website/package.json index c3dc673f9..9b955a7ad 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -18,10 +18,11 @@ "@docusaurus/plugin-google-gtag": "^2.4.1", "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", + "@getcanary/docusaurus-pagefind": "^0.0.7", + "@getcanary/web": "^0.0.46", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", - "docusaurus-lunr-search": "^2.4.1", "prism-react-renderer": "^1.3.5", "react": "^18.1.0", "react-dom": "^18.1.0", diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index d5159fbe9..28fe315c6 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1722,7 +1722,7 @@ "@docusaurus/theme-search-algolia" "2.4.1" "@docusaurus/types" "2.4.1" -"@docusaurus/react-loadable@5.5.2": +"@docusaurus/react-loadable@5.5.2", "react-loadable@npm:@docusaurus/react-loadable@5.5.2": version "5.5.2" resolved "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz" integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== @@ -1941,6 +1941,49 @@ resolved "https://registry.npmjs.org/@endiliey/react-ideal-image/-/react-ideal-image-0.0.11.tgz" integrity sha512-QxMjt/Gvur/gLxSoCy7VIyGGGrGmDN+VHcXkN3R2ApoWX0EYUE+hMgPHSW/PV6VVebZ1Nd4t2UnGRBDihu16JQ== +"@floating-ui/core@^1.6.0": + version "1.6.5" + resolved "https://registry.yarnpkg.com/@floating-ui/core/-/core-1.6.5.tgz#102335cac0d22035b04d70ca5ff092d2d1a26f2b" + integrity sha512-8GrTWmoFhm5BsMZOTHeGD2/0FLKLQQHvO/ZmQga4tKempYRLz8aqJGqXVuQgisnMObq2YZ2SgkwctN1LOOxcqA== + dependencies: + "@floating-ui/utils" "^0.2.5" + +"@floating-ui/dom@^1.6.8": + version "1.6.8" + resolved "https://registry.yarnpkg.com/@floating-ui/dom/-/dom-1.6.8.tgz#45e20532b6d8a061b356a4fb336022cf2609754d" + integrity sha512-kx62rP19VZ767Q653wsP1XZCGIirkE09E0QUGNYTM/ttbbQHqcGPdSfWFxUyyNLc/W6aoJRBajOSXhP6GXjC0Q== + dependencies: + "@floating-ui/core" "^1.6.0" + "@floating-ui/utils" "^0.2.5" + +"@floating-ui/utils@^0.2.5": + version "0.2.5" + resolved "https://registry.yarnpkg.com/@floating-ui/utils/-/utils-0.2.5.tgz#105c37d9d9620ce69b7f692a20c821bf1ad2cbf9" + integrity sha512-sTcG+QZ6fdEUObICavU+aB3Mp8HY4n14wYHdxK4fXjPmv3PXZZeY5RaguJmGyeH/CJQhX3fqKUtS4qc1LoHwhQ== + +"@getcanary/docusaurus-pagefind@^0.0.7": + version "0.0.7" + resolved "https://registry.yarnpkg.com/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.7.tgz#8a086891a456f5e333d43216bd5cf991718de106" + integrity sha512-1VDAF3/xfigsR6Tj6sQKt/OcCzETIIhhVSbhicBlxbhYY98j8jk3EyE9uxWgGg+lYz+Q+/Xmq+7tvsqBlKreeg== + dependencies: + cli-progress "^3.12.0" + micromatch "^4.0.7" + pagefind "^1.1.0" + +"@getcanary/web@^0.0.46": + version "0.0.46" + resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.46.tgz#98bdf0576665f03a2d3f645458c2a286f9243e3a" + integrity sha512-PdI/jiKzBX6OashQxWL+kBvHRL+0ciK9ei6OE6Poukz1/P+W4We6TXXPqsjbrWryPg2l6sammJY04clFyPIqiQ== + dependencies: + "@floating-ui/dom" "^1.6.8" + "@lit-labs/observers" "^2.0.2" + "@lit/context" "^1.1.2" + "@lit/task" "^1.0.1" + highlight.js "^11.10.0" + lit "^3.1.4" + marked "^13.0.2" + p-debounce "^4.0.0" + "@hapi/hoek@^9.0.0": version "9.3.0" resolved "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz" @@ -2017,6 +2060,39 @@ resolved "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.4.tgz" integrity sha512-Hcv+nVC0kZnQ3tD9GVu5xSMR4VVYOteQIr/hwFPVEvPdlXqgGEuRjiheChHgdM+JyqdgNcmzZOX/tnl0JOiI7A== +"@lit-labs/observers@^2.0.2": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@lit-labs/observers/-/observers-2.0.2.tgz#3f655a86e3dccc3a174f4f0149e8b318beb72025" + integrity sha512-eZb5+W9Cb0e/Y5m1DNxBSGTvGB2TAVTGMnTxL/IzFhPQEcZIAHewW1eVBhN8W07A5tirRaAmmF6fGL1V20p3gQ== + dependencies: + "@lit/reactive-element" "^1.0.0 || ^2.0.0" + +"@lit-labs/ssr-dom-shim@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@lit-labs/ssr-dom-shim/-/ssr-dom-shim-1.2.0.tgz#353ce4a76c83fadec272ea5674ede767650762fd" + integrity sha512-yWJKmpGE6lUURKAaIltoPIE/wrbY3TEkqQt+X0m+7fQNnAv0keydnYvbiJFP1PnMhizmIWRWOG5KLhYyc/xl+g== + +"@lit/context@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@lit/context/-/context-1.1.2.tgz#c67b37352117eb252143aa9763f75f7bfa284f88" + integrity sha512-S0nw2C6Tkm7fVX5TGYqeROGD+Z9Coa2iFpW+ysYBDH3YvCqOY3wVQvSgwbaliLJkjTnSEYCBe9qFqKV8WUFpVw== + dependencies: + "@lit/reactive-element" "^1.6.2 || ^2.0.0" + +"@lit/reactive-element@^1.0.0 || ^2.0.0", "@lit/reactive-element@^1.6.2 || ^2.0.0", "@lit/reactive-element@^2.0.4": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@lit/reactive-element/-/reactive-element-2.0.4.tgz#8f2ed950a848016383894a26180ff06c56ae001b" + integrity sha512-GFn91inaUa2oHLak8awSIigYz0cU0Payr1rcFsrkf5OJ5eSPxElyZfKh0f2p9FsTiZWXQdWGJeXZICEfXXYSXQ== + dependencies: + "@lit-labs/ssr-dom-shim" "^1.2.0" + +"@lit/task@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@lit/task/-/task-1.0.1.tgz#7462aeaa973766822567f5ca90fe157404e8eb81" + integrity sha512-fVLDtmwCau8NywnFIXaJxsCZjzaIxnVq+cFRKYC1Y4tA4/0rMTvF6DLZZ2JE51BwzOluaKtgJX8x1QDsQtAaIw== + dependencies: + "@lit/reactive-element" "^1.0.0 || ^2.0.0" + "@mdx-js/mdx@^1.6.22": version "1.6.22" resolved "https://registry.npmjs.org/@mdx-js/mdx/-/mdx-1.6.22.tgz" @@ -2086,6 +2162,31 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" +"@pagefind/darwin-arm64@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@pagefind/darwin-arm64/-/darwin-arm64-1.1.0.tgz#d1b9bcfda0bb099d15b8cc5fcd30e9a1ada8e649" + integrity sha512-SLsXNLtSilGZjvqis8sX42fBWsWAVkcDh1oerxwqbac84HbiwxpxOC2jm8hRwcR0Z55HPZPWO77XeRix/8GwTg== + +"@pagefind/darwin-x64@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@pagefind/darwin-x64/-/darwin-x64-1.1.0.tgz#182b5d86899b65beb56ae96c828f32c71a5f89bb" + integrity sha512-QjQSE/L5oS1C8N8GdljGaWtjCBMgMtfrPAoiCmINTu9Y9dp0ggAyXvF8K7Qg3VyIMYJ6v8vg2PN7Z3b+AaAqUA== + +"@pagefind/linux-arm64@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@pagefind/linux-arm64/-/linux-arm64-1.1.0.tgz#46e8af93106aa202efeae47510e2abcfa3182fa5" + integrity sha512-8zjYCa2BtNEL7KnXtysPtBELCyv5DSQ4yHeK/nsEq6w4ToAMTBl0K06khqxdSGgjMSwwrxvLzq3so0LC5Q14dA== + +"@pagefind/linux-x64@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@pagefind/linux-x64/-/linux-x64-1.1.0.tgz#6171ce1a6c0c31f8e3f962b9b81d96900ad2019a" + integrity sha512-4lsg6VB7A6PWTwaP8oSmXV4O9H0IHX7AlwTDcfyT+YJo/sPXOVjqycD5cdBgqNLfUk8B9bkWcTDCRmJbHrKeCw== + +"@pagefind/windows-x64@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@pagefind/windows-x64/-/windows-x64-1.1.0.tgz#92efa86baaea76a0268d8d4e692752426cc144b9" + integrity sha512-OboCM76BcMKT9IoSfZuFhiqMRgTde8x4qDDvKulFmycgiJrlL5WnIqBHJLQxZq+o2KyZpoHF97iwsGAm8c32sQ== + "@polka/url@^1.0.0-next.20": version "1.0.0-next.21" resolved "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz" @@ -2516,6 +2617,11 @@ dependencies: "@types/node" "*" +"@types/trusted-types@^2.0.2": + version "2.0.7" + resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.7.tgz#baccb07a970b91707df3a3e8ba6896c57ead2d11" + integrity sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw== + "@types/unist@^2", "@types/unist@^2.0.0", "@types/unist@^2.0.2", "@types/unist@^2.0.3": version "2.0.7" resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz" @@ -2671,11 +2777,6 @@ resolved "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz" integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ== -abbrev@1: - version "1.1.1" - resolved "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz" - integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q== - accepts@~1.3.4, accepts@~1.3.5, accepts@~1.3.8: version "1.3.8" resolved "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz" @@ -2885,11 +2986,6 @@ anymatch@~3.1.2: normalize-path "^3.0.0" picomatch "^2.0.4" -"aproba@^1.0.3 || ^2.0.0": - version "2.0.0" - resolved "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz" - integrity sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ== - arch@^2.1.0: version "2.2.0" resolved "https://registry.npmjs.org/arch/-/arch-2.2.0.tgz" @@ -3083,13 +3179,6 @@ atob@^2.1.2: resolved "https://registry.npmjs.org/atob/-/atob-2.1.2.tgz" integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg== -autocomplete.js@^0.37.0: - version "0.37.1" - resolved "https://registry.npmjs.org/autocomplete.js/-/autocomplete.js-0.37.1.tgz" - integrity sha512-PgSe9fHYhZEsm/9jggbjtVsGXJkPLvd+9mC7gZJ662vVL5CRWEtm/mIrrzCx0MrNxHVwxD5d00UOn6NsmL2LUQ== - dependencies: - immediate "^3.2.3" - autolinker@^3.11.0: version "3.16.2" resolved "https://registry.npmjs.org/autolinker/-/autolinker-3.16.2.tgz" @@ -3255,11 +3344,6 @@ batch@0.6.1: resolved "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz" integrity sha512-x+VAiMRL6UPkx+kudNvxTl6hB2XNNCG2r+7wixVfIYwu/2HKRXimwQyaumLjMveWvT2Hkd/cAJw+QBMfJ/EKVw== -bcp-47-match@^1.0.0: - version "1.0.3" - resolved "https://registry.npmjs.org/bcp-47-match/-/bcp-47-match-1.0.3.tgz" - integrity sha512-LggQ4YTdjWQSKELZF5JwchnBa1u0pIQSZf5lSdOHEdbVP55h0qICA/FUp3+W99q0xqxYa1ZQizTUH87gecII5w== - bcrypt-pbkdf@^1.0.0: version "1.0.2" resolved "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz" @@ -3462,6 +3546,13 @@ braces@^3.0.2, braces@~3.0.2: dependencies: fill-range "^7.0.1" +braces@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789" + integrity sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA== + dependencies: + fill-range "^7.1.1" + browserslist@4.14.2, browserslist@^4.12.0: version "4.14.2" resolved "https://registry.npmjs.org/browserslist/-/browserslist-4.14.2.tgz" @@ -3865,6 +3956,13 @@ cli-boxes@^3.0.0: resolved "https://registry.npmjs.org/cli-boxes/-/cli-boxes-3.0.0.tgz" integrity sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g== +cli-progress@^3.12.0: + version "3.12.0" + resolved "https://registry.yarnpkg.com/cli-progress/-/cli-progress-3.12.0.tgz#807ee14b66bcc086258e444ad0f19e7d42577942" + integrity sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A== + dependencies: + string-width "^4.2.3" + cli-table3@^0.6.2: version "0.6.3" resolved "https://registry.npmjs.org/cli-table3/-/cli-table3-0.6.3.tgz" @@ -3961,11 +4059,6 @@ color-string@^1.6.0, color-string@^1.9.0: color-name "^1.0.0" simple-swizzle "^0.2.2" -color-support@^1.1.2: - version "1.1.3" - resolved "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz" - integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg== - color@^3.0.0: version "3.2.1" resolved "https://registry.npmjs.org/color/-/color-3.2.1.tgz" @@ -4116,11 +4209,6 @@ consola@^2.15.3: resolved "https://registry.npmjs.org/consola/-/consola-2.15.3.tgz" integrity sha512-9vAdYbHj6x2fLKC4+oPH0kFzY/orMZyG2Aj+kNylHxKGJ/Ed4dpNyAQYwJOdqO4zdM7XpVHmyejQDcQHrnuXbw== -console-control-strings@^1.0.0: - version "1.1.0" - resolved "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz" - integrity sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ== - console-stream@^0.1.1: version "0.1.1" resolved "https://registry.npmjs.org/console-stream/-/console-stream-0.1.1.tgz" @@ -4410,11 +4498,6 @@ css-select@~1.2.0: domutils "1.5.1" nth-check "~1.0.1" -css-selector-parser@^1.0.0: - version "1.4.1" - resolved "https://registry.npmjs.org/css-selector-parser/-/css-selector-parser-1.4.1.tgz" - integrity sha512-HYPSb7y/Z7BNDCOrakL4raGO2zltZkbeXyAd6Tg9obzix6QhzxCotdBl6VT0Dv4vZfJGVz3WL/xaEI9Ly3ul0g== - css-tree@1.0.0-alpha.37: version "1.0.0-alpha.37" resolved "https://registry.npmjs.org/css-tree/-/css-tree-1.0.0-alpha.37.tgz" @@ -4869,11 +4952,6 @@ dir-glob@^3.0.1: dependencies: path-type "^4.0.0" -direction@^1.0.0: - version "1.0.4" - resolved "https://registry.npmjs.org/direction/-/direction-1.0.4.tgz" - integrity sha512-GYqKi1aH7PJXxdhTeZBFrg8vUBeKXi+cNprXsC1kpJcbcVnV9wBsrOu1cQEdG0WeQwlfHiy3XvnKfIrJ2R0NzQ== - discontinuous-range@1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/discontinuous-range/-/discontinuous-range-1.0.0.tgz" @@ -4891,26 +4969,6 @@ dns-packet@^5.2.2: dependencies: "@leichtgewicht/ip-codec" "^2.0.1" -docusaurus-lunr-search@^2.4.1: - version "2.4.1" - resolved "https://registry.npmjs.org/docusaurus-lunr-search/-/docusaurus-lunr-search-2.4.1.tgz" - integrity sha512-UOgaAypgO0iLyA1Hk4EThG/ofLm9/JldznzN98ZKr7TMYVjMZbAEaIBKLAUDFdfOPr9D5EswXdLn39/aRkwHMA== - dependencies: - autocomplete.js "^0.37.0" - clsx "^1.2.1" - gauge "^3.0.0" - hast-util-select "^4.0.0" - hast-util-to-text "^2.0.0" - hogan.js "^3.0.2" - lunr "^2.3.8" - lunr-languages "^1.4.0" - minimatch "^3.0.4" - object-assign "^4.1.1" - rehype-parse "^7.0.1" - to-vfile "^6.1.0" - unified "^9.0.0" - unist-util-is "^4.0.2" - docusaurus@^1.14.7: version "1.14.7" resolved "https://registry.npmjs.org/docusaurus/-/docusaurus-1.14.7.tgz" @@ -5859,6 +5917,13 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" +fill-range@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292" + integrity sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg== + dependencies: + to-regex-range "^5.0.1" + finalhandler@1.2.0: version "1.2.0" resolved "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz" @@ -6098,21 +6163,6 @@ functions-have-names@^1.2.3: resolved "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz" integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== -gauge@^3.0.0: - version "3.0.2" - resolved "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz" - integrity sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q== - dependencies: - aproba "^1.0.3 || ^2.0.0" - color-support "^1.1.2" - console-control-strings "^1.0.0" - has-unicode "^2.0.1" - object-assign "^4.1.1" - signal-exit "^3.0.0" - string-width "^4.2.3" - strip-ansi "^6.0.1" - wide-align "^1.1.2" - gaze@^1.1.3: version "1.1.3" resolved "https://registry.npmjs.org/gaze/-/gaze-1.1.3.tgz" @@ -6565,11 +6615,6 @@ has-tostringtag@^1.0.0: dependencies: has-symbols "^1.0.2" -has-unicode@^2.0.1: - version "2.0.1" - resolved "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz" - integrity sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ== - has-value@^0.3.1: version "0.3.1" resolved "https://registry.npmjs.org/has-value/-/has-value-0.3.1.tgz" @@ -6645,16 +6690,6 @@ hast-util-from-parse5@^6.0.0: vfile-location "^3.2.0" web-namespaces "^1.0.0" -hast-util-has-property@^1.0.0: - version "1.0.4" - resolved "https://registry.npmjs.org/hast-util-has-property/-/hast-util-has-property-1.0.4.tgz" - integrity sha512-ghHup2voGfgFoHMGnaLHOjbYFACKrRh9KFttdCzMCbFoBMJXiNi2+XTrPP8+q6cDJM/RSqlCfVWrjp1H201rZg== - -hast-util-is-element@^1.0.0: - version "1.1.0" - resolved "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-1.1.0.tgz" - integrity sha512-oUmNua0bFbdrD/ELDSSEadRVtWZOf3iF6Lbv81naqsIV99RnSCieTbWuWCY8BAeEfKJTKl0gRdokv+dELutHGQ== - hast-util-parse-selector@^2.0.0: version "2.2.5" resolved "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz" @@ -6676,26 +6711,6 @@ hast-util-raw@6.0.1: xtend "^4.0.0" zwitch "^1.0.0" -hast-util-select@^4.0.0: - version "4.0.2" - resolved "https://registry.npmjs.org/hast-util-select/-/hast-util-select-4.0.2.tgz" - integrity sha512-8EEG2//bN5rrzboPWD2HdS3ugLijNioS1pqOTIolXNf67xxShYw4SQEmVXd3imiBG+U2bC2nVTySr/iRAA7Cjg== - dependencies: - bcp-47-match "^1.0.0" - comma-separated-tokens "^1.0.0" - css-selector-parser "^1.0.0" - direction "^1.0.0" - hast-util-has-property "^1.0.0" - hast-util-is-element "^1.0.0" - hast-util-to-string "^1.0.0" - hast-util-whitespace "^1.0.0" - not "^0.1.0" - nth-check "^2.0.0" - property-information "^5.0.0" - space-separated-tokens "^1.0.0" - unist-util-visit "^2.0.0" - zwitch "^1.0.0" - hast-util-to-parse5@^6.0.0: version "6.0.0" resolved "https://registry.npmjs.org/hast-util-to-parse5/-/hast-util-to-parse5-6.0.0.tgz" @@ -6707,25 +6722,6 @@ hast-util-to-parse5@^6.0.0: xtend "^4.0.0" zwitch "^1.0.0" -hast-util-to-string@^1.0.0: - version "1.0.4" - resolved "https://registry.npmjs.org/hast-util-to-string/-/hast-util-to-string-1.0.4.tgz" - integrity sha512-eK0MxRX47AV2eZ+Lyr18DCpQgodvaS3fAQO2+b9Two9F5HEoRPhiUMNzoXArMJfZi2yieFzUBMRl3HNJ3Jus3w== - -hast-util-to-text@^2.0.0: - version "2.0.1" - resolved "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-2.0.1.tgz" - integrity sha512-8nsgCARfs6VkwH2jJU9b8LNTuR4700na+0h3PqCaEk4MAnMDeu5P0tP8mjk9LLNGxIeQRLbiDbZVw6rku+pYsQ== - dependencies: - hast-util-is-element "^1.0.0" - repeat-string "^1.0.0" - unist-util-find-after "^3.0.0" - -hast-util-whitespace@^1.0.0: - version "1.0.4" - resolved "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-1.0.4.tgz" - integrity sha512-I5GTdSfhYfAPNztx2xJRQpG8cuDSNt599/7YUn7Gx/WxNMsG+a835k97TDkFgk123cwjfwINaZknkKkphx/f2A== - hastscript@^6.0.0: version "6.0.0" resolved "https://registry.npmjs.org/hastscript/-/hastscript-6.0.0.tgz" @@ -6747,6 +6743,11 @@ hex-color-regex@^1.1.0: resolved "https://registry.npmjs.org/hex-color-regex/-/hex-color-regex-1.1.0.tgz" integrity sha512-l9sfDFsuqtOqKDsQdqrMRk0U85RZc0RtOR9yPI7mRVOa4FsR/BVnZ0shmQRM96Ji99kYZP/7hn1cedc1+ApsTQ== +highlight.js@^11.10.0: + version "11.10.0" + resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-11.10.0.tgz#6e3600dc4b33d6dc23d5bd94fbf72405f5892b92" + integrity sha512-SYVnVFswQER+zu1laSya563s+F8VDGt7o35d4utbamowvUNLLMovFqwCLSocpZTz3MgaSRA1IbqRWZv97dtErQ== + highlight.js@^9.16.2: version "9.18.5" resolved "https://registry.npmjs.org/highlight.js/-/highlight.js-9.18.5.tgz" @@ -6764,14 +6765,6 @@ history@^4.9.0: tiny-warning "^1.0.0" value-equal "^1.0.1" -hogan.js@^3.0.2: - version "3.0.2" - resolved "https://registry.npmjs.org/hogan.js/-/hogan.js-3.0.2.tgz" - integrity sha512-RqGs4wavGYJWE07t35JQccByczmNUXQT0E12ZYV1VKYu5UiAU9lsos/yBAcf840+zrUQQxgVduCR5/B8nNtibg== - dependencies: - mkdirp "0.3.0" - nopt "1.0.10" - hoist-non-react-statics@^3.1.0: version "3.3.2" resolved "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz" @@ -7039,11 +7032,6 @@ imagemin@^6.0.0: pify "^4.0.1" replace-ext "^1.0.0" -immediate@^3.2.3: - version "3.3.0" - resolved "https://registry.npmjs.org/immediate/-/immediate-3.3.0.tgz" - integrity sha512-HR7EVodfFUdQCTIeySw+WDRFJlPcLOJbXfwwZ7Oom6tjsvZ3bOkCDJHehQC3nxJrv7+f9XecwazynjU8e4Vw3Q== - immer@8.0.1: version "8.0.1" resolved "https://registry.npmjs.org/immer/-/immer-8.0.1.tgz" @@ -7921,6 +7909,31 @@ listenercount@~1.0.1: resolved "https://registry.npmjs.org/listenercount/-/listenercount-1.0.1.tgz" integrity sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ== +lit-element@^4.0.4: + version "4.0.6" + resolved "https://registry.yarnpkg.com/lit-element/-/lit-element-4.0.6.tgz#b9f5b5d68f30636be1314ec76c9a73a6405f04dc" + integrity sha512-U4sdJ3CSQip7sLGZ/uJskO5hGiqtlpxndsLr6mt3IQIjheg93UKYeGQjWMRql1s/cXNOaRrCzC2FQwjIwSUqkg== + dependencies: + "@lit-labs/ssr-dom-shim" "^1.2.0" + "@lit/reactive-element" "^2.0.4" + lit-html "^3.1.2" + +lit-html@^3.1.2: + version "3.1.4" + resolved "https://registry.yarnpkg.com/lit-html/-/lit-html-3.1.4.tgz#30ad4f11467a61e2f08856de170e343184e9034e" + integrity sha512-yKKO2uVv7zYFHlWMfZmqc+4hkmSbFp8jgjdZY9vvR9jr4J8fH6FUMXhr+ljfELgmjpvlF7Z1SJ5n5/Jeqtc9YA== + dependencies: + "@types/trusted-types" "^2.0.2" + +lit@^3.1.4: + version "3.1.4" + resolved "https://registry.yarnpkg.com/lit/-/lit-3.1.4.tgz#03a72e9f0b1f5da317bf49b1ab579a7132e73d7a" + integrity sha512-q6qKnKXHy2g1kjBaNfcoLlgbI3+aSOZ9Q4tiGa9bGYXq5RBXxkVTqTIVmP2VWMp29L4GyvCFm8ZQ2o56eUAMyA== + dependencies: + "@lit/reactive-element" "^2.0.4" + lit-element "^4.0.4" + lit-html "^3.1.2" + livereload-js@^2.3.0: version "2.4.0" resolved "https://registry.npmjs.org/livereload-js/-/livereload-js-2.4.0.tgz" @@ -8209,16 +8222,6 @@ lru-cache@^6.0.0: dependencies: yallist "^4.0.0" -lunr-languages@^1.4.0: - version "1.13.0" - resolved "https://registry.npmjs.org/lunr-languages/-/lunr-languages-1.13.0.tgz" - integrity sha512-qgTOarcnAtVFKr0aJ2GuiqbBdhKF61jpF8OgFbnlSAb1t6kOiQW67q0hv0UQzzB+5+OwPpnZyFT/L0L9SQG1/A== - -lunr@^2.3.8: - version "2.3.9" - resolved "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz" - integrity sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow== - make-dir@^1.0.0, make-dir@^1.2.0: version "1.3.0" resolved "https://registry.npmjs.org/make-dir/-/make-dir-1.3.0.tgz" @@ -8286,6 +8289,11 @@ markdown-toc@^1.2.0: repeat-string "^1.6.1" strip-color "^0.1.0" +marked@^13.0.2: + version "13.0.2" + resolved "https://registry.yarnpkg.com/marked/-/marked-13.0.2.tgz#d5d05bd2683a85cb9cc6afbe5240e3a8bffcb92a" + integrity sha512-J6CPjP8pS5sgrRqxVRvkCIkZ6MFdRIjDkwUwgJ9nL2fbmM6qGQeB2C16hi8Cc9BOzj6xXzy0jyi0iPIfnMHYzA== + math-random@^1.0.1: version "1.0.4" resolved "https://registry.npmjs.org/math-random/-/math-random-1.0.4.tgz" @@ -8419,6 +8427,14 @@ micromatch@^4.0.2, micromatch@^4.0.4, micromatch@^4.0.5: braces "^3.0.2" picomatch "^2.3.1" +micromatch@^4.0.7: + version "4.0.7" + resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.7.tgz#33e8190d9fe474a9895525f5618eee136d46c2e5" + integrity sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q== + dependencies: + braces "^3.0.3" + picomatch "^2.3.1" + mime-db@1.52.0, "mime-db@>= 1.43.0 < 2": version "1.52.0" resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz" @@ -8514,11 +8530,6 @@ mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3: resolved "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz" integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A== -mkdirp@0.3.0: - version "0.3.0" - resolved "https://registry.npmjs.org/mkdirp/-/mkdirp-0.3.0.tgz" - integrity sha512-OHsdUcVAQ6pOtg5JYWpCBo9W/GySVuwvP9hueRMW7UqshC0tbfzLv8wjySTPm3tfUZ/21CE9E1pJagOA91Pxew== - "mkdirp@>=0.5 0", mkdirp@^0.5.1, mkdirp@^0.5.6, mkdirp@~0.5.1: version "0.5.6" resolved "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz" @@ -8665,13 +8676,6 @@ node-releases@^2.0.14: resolved "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz" integrity sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw== -nopt@1.0.10: - version "1.0.10" - resolved "https://registry.npmjs.org/nopt/-/nopt-1.0.10.tgz" - integrity sha512-NWmpvLSqUrgrAC9HCuxEvb+PSloHpqVu+FqcO4eeF2h5qYRhA7ev6KvelyQAKtegUbC6RypJnlEOhd8vloNKYg== - dependencies: - abbrev "1" - normalize-package-data@^2.3.2, normalize-package-data@^2.3.4: version "2.5.0" resolved "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz" @@ -8716,11 +8720,6 @@ normalize-url@^6.0.1: resolved "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz" integrity sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A== -not@^0.1.0: - version "0.1.0" - resolved "https://registry.npmjs.org/not/-/not-0.1.0.tgz" - integrity sha512-5PDmaAsVfnWUgTUbJ3ERwn7u79Z0dYxN9ErxCpVJJqe2RK0PJ3z+iFUxuqjwtlDDegXvtWoxD/3Fzxox7tFGWA== - npm-conf@^1.1.0: version "1.1.3" resolved "https://registry.npmjs.org/npm-conf/-/npm-conf-1.1.3.tgz" @@ -8755,7 +8754,7 @@ nth-check@^1.0.2, nth-check@~1.0.1: dependencies: boolbase "~1.0.0" -nth-check@^2.0.0, nth-check@^2.0.1: +nth-check@^2.0.1: version "2.1.1" resolved "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz" integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w== @@ -8950,6 +8949,11 @@ p-cancelable@^1.0.0: resolved "https://registry.npmjs.org/p-cancelable/-/p-cancelable-1.1.0.tgz" integrity sha512-s73XxOZ4zpt1edZYZzvhqFa6uvQc1vwUa0K0BdtIZgQMAJj9IbebH+JkgKZc9h+B05PKHLOTl4ajG1BmNrVZlw== +p-debounce@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/p-debounce/-/p-debounce-4.0.0.tgz#348e3f44489baa9435cc7d807f17b3bb2fb16b24" + integrity sha512-4Ispi9I9qYGO4lueiLDhe4q4iK5ERK8reLsuzH6BPaXn53EGaua8H66PXIFGrW897hwjXp+pVLrm/DLxN0RF0A== + p-event@^1.0.0: version "1.3.0" resolved "https://registry.npmjs.org/p-event/-/p-event-1.3.0.tgz" @@ -9070,6 +9074,17 @@ package-json@^6.3.0: registry-url "^5.0.0" semver "^6.2.0" +pagefind@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/pagefind/-/pagefind-1.1.0.tgz#6b758ca9cae28c3776b40db6a3b9478d2286c27b" + integrity sha512-1nmj0/vfYcMxNEQj0YDRp6bTVv9hI7HLdPhK/vBBYlrnwjATndQvHyicj5Y7pUHrpCFZpFnLVQXIF829tpFmaw== + optionalDependencies: + "@pagefind/darwin-arm64" "1.1.0" + "@pagefind/darwin-x64" "1.1.0" + "@pagefind/linux-arm64" "1.1.0" + "@pagefind/linux-x64" "1.1.0" + "@pagefind/windows-x64" "1.1.0" + param-case@^3.0.4: version "3.0.4" resolved "https://registry.npmjs.org/param-case/-/param-case-3.0.4.tgz" @@ -10320,14 +10335,6 @@ react-loadable-ssr-addon-v5-slorber@^1.0.1: dependencies: "@babel/runtime" "^7.10.3" -"react-loadable@npm:@docusaurus/react-loadable@5.5.2": - version "5.5.2" - resolved "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz" - integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== - dependencies: - "@types/react" "*" - prop-types "^15.6.2" - react-router-config@^5.1.1: version "5.1.1" resolved "https://registry.npmjs.org/react-router-config/-/react-router-config-5.1.1.tgz" @@ -10572,14 +10579,6 @@ regjsparser@^0.9.1: dependencies: jsesc "~0.5.0" -rehype-parse@^7.0.1: - version "7.0.1" - resolved "https://registry.npmjs.org/rehype-parse/-/rehype-parse-7.0.1.tgz" - integrity sha512-fOiR9a9xH+Le19i4fGzIEowAbwG7idy2Jzs4mOrFWBSJ0sNUgy0ev871dwWnbOo371SjgjG4pwzrbgSVrKxecw== - dependencies: - hast-util-from-parse5 "^6.0.0" - parse5 "^6.0.0" - relateurl@^0.2.7: version "0.2.7" resolved "https://registry.npmjs.org/relateurl/-/relateurl-0.2.7.tgz" @@ -10674,7 +10673,7 @@ repeat-element@^1.1.2: resolved "https://registry.npmjs.org/repeat-element/-/repeat-element-1.1.4.tgz" integrity sha512-LFiNfRcSu7KK3evMyYOuCzv3L10TW7yC1G2/+StMjK8Y6Vqd2MG7r/Qjw4ghtuCOjFvlnms/iMmLqpvW/ES/WQ== -repeat-string@^1.0.0, repeat-string@^1.5.2, repeat-string@^1.5.4, repeat-string@^1.6.1: +repeat-string@^1.5.2, repeat-string@^1.5.4, repeat-string@^1.6.1: version "1.6.1" resolved "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz" integrity sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w== @@ -11536,7 +11535,7 @@ string-template@~0.2.1: resolved "https://registry.npmjs.org/string-template/-/string-template-0.2.1.tgz" integrity sha512-Yptehjogou2xm4UJbxJ4CxgZx12HBfeystp0y3x7s4Dj32ltVVG1Gg8YhKjHZkHicuKpZX/ffilA8505VbUbpw== -"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: +string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: version "4.2.3" resolved "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -11998,14 +11997,6 @@ to-regex@^3.0.1, to-regex@^3.0.2: regex-not "^1.0.2" safe-regex "^1.1.0" -to-vfile@^6.1.0: - version "6.1.0" - resolved "https://registry.npmjs.org/to-vfile/-/to-vfile-6.1.0.tgz" - integrity sha512-BxX8EkCxOAZe+D/ToHdDsJcVI4HqQfmw0tCkp31zf3dNP/XWIAjU4CmeuSwsSoOzOTqHPOL0KUzyZqJplkD0Qw== - dependencies: - is-buffer "^2.0.0" - vfile "^4.0.0" - toidentifier@1.0.1: version "1.0.1" resolved "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz" @@ -12242,7 +12233,7 @@ unified@9.2.0: trough "^1.0.0" vfile "^4.0.0" -unified@^9.0.0, unified@^9.2.2: +unified@^9.2.2: version "9.2.2" resolved "https://registry.npmjs.org/unified/-/unified-9.2.2.tgz" integrity sha512-Sg7j110mtefBD+qunSLO1lqOEKdrwBFBrR6Qd8f4uwkhWNlbkaqwHse6e7QvD3AP/MNoJdEDLaf8OxYyoWgorQ== @@ -12286,19 +12277,12 @@ unist-builder@2.0.3, unist-builder@^2.0.0: resolved "https://registry.npmjs.org/unist-builder/-/unist-builder-2.0.3.tgz" integrity sha512-f98yt5pnlMWlzP539tPc4grGMsFaQQlP/vM396b00jngsiINumNmsY8rkXjfoi1c6QaM8nQ3vaGDuoKWbe/1Uw== -unist-util-find-after@^3.0.0: - version "3.0.0" - resolved "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-3.0.0.tgz" - integrity sha512-ojlBqfsBftYXExNu3+hHLfJQ/X1jYY/9vdm4yZWjIbf0VuWF6CRufci1ZyoD/wV2TYMKxXUoNuoqwy+CkgzAiQ== - dependencies: - unist-util-is "^4.0.0" - unist-util-generated@^1.0.0: version "1.1.6" resolved "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-1.1.6.tgz" integrity sha512-cln2Mm1/CZzN5ttGK7vkoGw+RZ8VcUH6BtGbq98DDtRGquAAOXig1mrBQYelOwMXYS8rK+vZDyyojSjp7JX+Lg== -unist-util-is@^4.0.0, unist-util-is@^4.0.2: +unist-util-is@^4.0.0: version "4.1.0" resolved "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.1.0.tgz" integrity sha512-ZOQSsnce92GrxSqlnEEseX0gi7GH9zTJZ0p9dtu87WRb/37mMPO2Ilx1s/t9vBHrFhbgweUwb+t7cIn5dxPhZg== @@ -12804,13 +12788,6 @@ which@^2.0.1: dependencies: isexe "^2.0.0" -wide-align@^1.1.2: - version "1.1.5" - resolved "https://registry.npmjs.org/wide-align/-/wide-align-1.1.5.tgz" - integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg== - dependencies: - string-width "^1.0.2 || 2 || 3 || 4" - widest-line@^3.1.0: version "3.1.0" resolved "https://registry.npmjs.org/widest-line/-/widest-line-3.1.0.tgz" From 823d20101d372add9e9ffebea731bb65e0527f09 Mon Sep 17 00:00:00 2001 From: yujonglee Date: Thu, 25 Jul 2024 22:37:46 +0900 Subject: [PATCH 042/655] eject default UI npm run swizzle @getcanary/docusaurus-pagefind SearchBar -- --eject --danger --- .../my-website/src/theme/SearchBar/Canary.jsx | 42 +++++++++++++++++++ docs/my-website/src/theme/SearchBar/index.jsx | 29 +++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 docs/my-website/src/theme/SearchBar/Canary.jsx create mode 100644 docs/my-website/src/theme/SearchBar/index.jsx diff --git a/docs/my-website/src/theme/SearchBar/Canary.jsx b/docs/my-website/src/theme/SearchBar/Canary.jsx new file mode 100644 index 000000000..9805be67f --- /dev/null +++ b/docs/my-website/src/theme/SearchBar/Canary.jsx @@ -0,0 +1,42 @@ +import React from "react"; + +export default function Canary({ path }) { + const [loaded, setLoaded] = React.useState(false); + + React.useEffect(() => { + Promise.all([ + import("@getcanary/web/components/canary-root"), + import("@getcanary/web/components/canary-provider-pagefind"), + import("@getcanary/web/components/canary-modal"), + import("@getcanary/web/components/canary-trigger-searchbar"), + import("@getcanary/web/components/canary-content"), + import("@getcanary/web/components/canary-search"), + import("@getcanary/web/components/canary-search-input"), + import("@getcanary/web/components/canary-search-results"), + ]) + .then(() => setLoaded(true)) + .catch((e) => + console.error("Maybe you forgot to install '@getcanary/web'?", e), + ); + }, []); + + if (!loaded) { + return null; + } + + return ( + + + + + + + + + + + + + + ); +} diff --git a/docs/my-website/src/theme/SearchBar/index.jsx b/docs/my-website/src/theme/SearchBar/index.jsx new file mode 100644 index 000000000..da018f108 --- /dev/null +++ b/docs/my-website/src/theme/SearchBar/index.jsx @@ -0,0 +1,29 @@ +import React from "react"; + +import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; +import { usePluginData } from "@docusaurus/useGlobalData"; + +import Canary from "./Canary"; + +export default function Index() { + const { siteConfig } = useDocusaurusContext(); + const { options } = usePluginData("docusaurus-plugin-pagefind-canary"); + + const [path, setPath] = React.useState(""); + + React.useEffect(() => { + setPath(`${siteConfig.baseUrl}pagefind/pagefind.js`); + }, [siteConfig]); + + React.useEffect(() => { + for (const [k, v] of Object.entries(options?.styles ?? {})) { + document.body.style.setProperty(k, v); + } + }, [options]); + + if (!path) { + return null; + } + + return ; +} From 78de9424a76477efbf9cd0f452dfedf7d2943dc4 Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 10:32:13 +0900 Subject: [PATCH 043/655] customize --- .../my-website/src/theme/SearchBar/Canary.jsx | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/my-website/src/theme/SearchBar/Canary.jsx b/docs/my-website/src/theme/SearchBar/Canary.jsx index 9805be67f..03cec5292 100644 --- a/docs/my-website/src/theme/SearchBar/Canary.jsx +++ b/docs/my-website/src/theme/SearchBar/Canary.jsx @@ -12,7 +12,9 @@ export default function Canary({ path }) { import("@getcanary/web/components/canary-content"), import("@getcanary/web/components/canary-search"), import("@getcanary/web/components/canary-search-input"), - import("@getcanary/web/components/canary-search-results"), + import("@getcanary/web/components/canary-search-results-group"), + import("@getcanary/web/components/canary-callout-calendly"), + import("@getcanary/web/components/canary-callout-discord"), ]) .then(() => setLoaded(true)) .catch((e) => @@ -32,7 +34,22 @@ export default function Canary({ path }) { - + + + From 986352037645d688e5e1c78dc3958ee459a24fae Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 18:48:56 -0700 Subject: [PATCH 044/655] support using */* --- .circleci/config.yml | 2 +- litellm/proxy/proxy_config.yaml | 3 +++ litellm/tests/test_get_llm_provider.py | 5 +++++ litellm/utils.py | 2 ++ proxy_server_config.yaml | 7 +++++++ 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a9a5be671..a29b76110 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -208,7 +208,7 @@ jobs: -e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \ -e MISTRAL_API_KEY=$MISTRAL_API_KEY \ -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \ + -e GROQ_API_KEY=$GROQ_API_KEY \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ -e AUTO_INFER_REGION=True \ diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 0e3f0826e..9d913b458 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -8,6 +8,9 @@ model_list: litellm_params: model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct api_key: "os.environ/FIREWORKS" + - model_name: "*" + litellm_params: + model: "*" general_settings: master_key: sk-1234 alerting: ["slack"] diff --git a/litellm/tests/test_get_llm_provider.py b/litellm/tests/test_get_llm_provider.py index 3ec867af4..6f53b0f8f 100644 --- a/litellm/tests/test_get_llm_provider.py +++ b/litellm/tests/test_get_llm_provider.py @@ -25,6 +25,11 @@ def test_get_llm_provider(): # test_get_llm_provider() +def test_get_llm_provider_catch_all(): + _, response, _, _ = litellm.get_llm_provider(model="*") + assert response == "openai" + + def test_get_llm_provider_gpt_instruct(): _, response, _, _ = litellm.get_llm_provider(model="gpt-3.5-turbo-instruct-0914") diff --git a/litellm/utils.py b/litellm/utils.py index e104de958..cceed6b9d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4650,6 +4650,8 @@ def get_llm_provider( custom_llm_provider = "openai" elif model in litellm.empower_models: custom_llm_provider = "empower" + elif model == "*": + custom_llm_provider = "openai" if custom_llm_provider is None or custom_llm_provider == "": if litellm.suppress_debug_info == False: print() # noqa diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index 5ee7192c8..f7766b65b 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -85,6 +85,13 @@ model_list: litellm_params: model: openai/* api_key: os.environ/OPENAI_API_KEY + + # Pass through all llm requests to litellm.completion/litellm.embedding + # if user passes model="anthropic/claude-3-opus-20240229" proxy will make requests to anthropic claude-3-opus-20240229 using ANTHROPIC_API_KEY + - model_name: "*" + litellm_params: + model: "*" + - model_name: mistral-embed litellm_params: model: mistral/mistral-embed From 05858cb249bdd594a9dd084b6d71bf79b9449199 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 18:54:30 -0700 Subject: [PATCH 045/655] test proxy all model --- tests/test_openai_endpoints.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py index 59ac10552..a77da8d52 100644 --- a/tests/test_openai_endpoints.py +++ b/tests/test_openai_endpoints.py @@ -7,6 +7,9 @@ from openai import OpenAI, AsyncOpenAI from typing import Optional, List, Union +LITELLM_MASTER_KEY = "sk-1234" + + def response_header_check(response): """ - assert if response headers < 4kb (nginx limit). @@ -467,6 +470,22 @@ async def test_openai_wildcard_chat_completion(): await chat_completion(session=session, key=key, model="gpt-3.5-turbo-0125") +@pytest.mark.asyncio +async def test_proxy_all_models(): + """ + - proxy_server_config.yaml has model = * / * + - Make chat completion call + - groq is NOT defined on /models + + + """ + async with aiohttp.ClientSession() as session: + # call chat/completions with a model that the key was not created for + the model is not on the config.yaml + await chat_completion( + session=session, key=LITELLM_MASTER_KEY, model="groq/llama3-8b-8192" + ) + + @pytest.mark.asyncio async def test_batch_chat_completions(): """ From ff0f21a1f35f0bf1c7f5c2372881291b4761defb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:02:22 -0700 Subject: [PATCH 046/655] docs - anthropic --- docs/my-website/docs/providers/anthropic.md | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 496343f87..2227b7a6b 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -82,6 +82,47 @@ model_list: ```bash litellm --config /path/to/config.yaml ``` + + + +Use this if you want to make requests to `claude-3-haiku-20240307`,`claude-3-opus-20240229`,`claude-2.1` without defining them on the config.yaml + +#### Required env variables +``` +ANTHROPIC_API_KEY=sk-ant**** +``` + +```yaml +model_list: + - model_name: "*" + litellm_params: + model: "*" +``` + +```bash +litellm --config /path/to/config.yaml +``` + +Example Request for this config.yaml + +**Ensure you use `anthropic/` prefix to route the request to Anthropic API** + +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--data ' { + "model": "anthropic/claude-3-haiku-20240307", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] + } +' +``` + + From 41abd5124023c931aa7856271d6e5761804358e6 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 19:03:52 -0700 Subject: [PATCH 047/655] fix(custom_llm.py): pass input params to custom llm --- litellm/llms/custom_llm.py | 80 ++++++++++++++++++++++++++-- litellm/main.py | 21 +++++++- litellm/tests/test_custom_llm.py | 91 ++++++++++++++++++++++++++++++-- 3 files changed, 182 insertions(+), 10 deletions(-) diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py index f1b2b28b4..47c5a485c 100644 --- a/litellm/llms/custom_llm.py +++ b/litellm/llms/custom_llm.py @@ -59,16 +59,88 @@ class CustomLLM(BaseLLM): def __init__(self) -> None: super().__init__() - def completion(self, *args, **kwargs) -> ModelResponse: + def completion( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[HTTPHandler] = None, + ) -> ModelResponse: raise CustomLLMError(status_code=500, message="Not implemented yet!") - def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + def streaming( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[HTTPHandler] = None, + ) -> Iterator[GenericStreamingChunk]: raise CustomLLMError(status_code=500, message="Not implemented yet!") - async def acompletion(self, *args, **kwargs) -> ModelResponse: + async def acompletion( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[AsyncHTTPHandler] = None, + ) -> ModelResponse: raise CustomLLMError(status_code=500, message="Not implemented yet!") - async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: + async def astreaming( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[AsyncHTTPHandler] = None, + ) -> AsyncIterator[GenericStreamingChunk]: raise CustomLLMError(status_code=500, message="Not implemented yet!") diff --git a/litellm/main.py b/litellm/main.py index c3be01373..672029f69 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2711,8 +2711,27 @@ def completion( async_fn=acompletion, stream=stream, custom_llm=custom_handler ) + headers = headers or litellm.headers + ## CALL FUNCTION - response = handler_fn() + response = handler_fn( + model=model, + messages=messages, + headers=headers, + model_response=model_response, + print_verbose=print_verbose, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + logger_fn=logger_fn, + timeout=timeout, # type: ignore + custom_prompt_dict=custom_prompt_dict, + client=client, # pass AsyncOpenAI, OpenAI client + encoding=encoding, + ) if stream is True: return CustomStreamWrapper( completion_stream=response, diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py index af88b1f3a..a0f8b569e 100644 --- a/litellm/tests/test_custom_llm.py +++ b/litellm/tests/test_custom_llm.py @@ -17,7 +17,16 @@ sys.path.insert( import os from collections import defaultdict from concurrent.futures import ThreadPoolExecutor -from typing import Any, AsyncGenerator, AsyncIterator, Coroutine, Iterator, Union +from typing import ( + Any, + AsyncGenerator, + AsyncIterator, + Callable, + Coroutine, + Iterator, + Optional, + Union, +) from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -94,21 +103,75 @@ class CustomModelResponseIterator: class MyCustomLLM(CustomLLM): - def completion(self, *args, **kwargs) -> litellm.ModelResponse: + def completion( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable[..., Any], + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, openai.Timeout]] = None, + client: Optional[litellm.HTTPHandler] = None, + ) -> ModelResponse: return litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], mock_response="Hi!", ) # type: ignore - async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse: + async def acompletion( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable[..., Any], + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, openai.Timeout]] = None, + client: Optional[litellm.AsyncHTTPHandler] = None, + ) -> litellm.ModelResponse: return litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], mock_response="Hi!", ) # type: ignore - def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + def streaming( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable[..., Any], + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, openai.Timeout]] = None, + client: Optional[litellm.HTTPHandler] = None, + ) -> Iterator[GenericStreamingChunk]: generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, @@ -126,7 +189,25 @@ class MyCustomLLM(CustomLLM): ) return custom_iterator - async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: # type: ignore + async def astreaming( # type: ignore + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable[..., Any], + encoding, + api_key, + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, openai.Timeout]] = None, + client: Optional[litellm.AsyncHTTPHandler] = None, + ) -> AsyncIterator[GenericStreamingChunk]: # type: ignore generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, From af1cd9e06f900e8bba35d4083847cf2e2b97f60a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:17:20 -0700 Subject: [PATCH 048/655] docs on pass through support --- docs/my-website/docs/proxy/configs.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index ecd82375e..cb0841c60 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -59,6 +59,13 @@ model_list: rpm: 1440 model_info: version: 2 + + # Use this if you want to make requests to `claude-3-haiku-20240307`,`claude-3-opus-20240229`,`claude-2.1` without defining them on the config.yaml + # Default models + # Works for ALL Providers and needs the default provider credentials in .env + - model_name: "*" + litellm_params: + model: "*" litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py drop_params: True From fcd834b2775c3a8531a57c66b1e9e8847741cfc1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:22:26 -0700 Subject: [PATCH 049/655] fix logfire - don't load_dotenv --- litellm/integrations/logfire_logger.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/litellm/integrations/logfire_logger.py b/litellm/integrations/logfire_logger.py index fa4ab7bd5..5e9267dca 100644 --- a/litellm/integrations/logfire_logger.py +++ b/litellm/integrations/logfire_logger.py @@ -2,10 +2,6 @@ # On success + failure, log events to Logfire import os - -import dotenv - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import uuid from enum import Enum From 693bcfac39d6f930e0dc14a2d233bb158f2ac65e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:32:49 -0700 Subject: [PATCH 050/655] fix using pass_through_all_models --- litellm/proxy/proxy_config.yaml | 4 ++++ litellm/proxy/proxy_server.py | 33 ++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 9d913b458..bb256c49b 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -11,6 +11,10 @@ model_list: - model_name: "*" litellm_params: model: "*" + - model_name: "*" + litellm_params: + model: openai/* + api_key: os.environ/OPENAI_API_KEY general_settings: master_key: sk-1234 alerting: ["slack"] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 022bb3040..1c9a36912 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2882,14 +2882,15 @@ async def chat_completion( elif ( llm_router is not None and data["model"] not in router_model_names - and llm_router.default_deployment is not None - ): # model in router deployments, calling a specific deployment on the router - tasks.append(llm_router.acompletion(**data)) - elif ( - llm_router is not None and llm_router.router_general_settings.pass_through_all_models is True ): tasks.append(litellm.acompletion(**data)) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + tasks.append(llm_router.acompletion(**data)) elif user_model is not None: # `litellm --model ` tasks.append(litellm.acompletion(**data)) else: @@ -3144,6 +3145,12 @@ async def completion( llm_router is not None and data["model"] in llm_router.get_model_ids() ): # model in router model list llm_response = asyncio.create_task(llm_router.atext_completion(**data)) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.router_general_settings.pass_through_all_models is True + ): + llm_response = asyncio.create_task(litellm.atext_completion(**data)) elif ( llm_router is not None and data["model"] not in router_model_names @@ -3152,11 +3159,6 @@ async def completion( llm_response = asyncio.create_task(llm_router.atext_completion(**data)) elif user_model is not None: # `litellm --model ` llm_response = asyncio.create_task(litellm.atext_completion(**data)) - elif ( - llm_router is not None - and llm_router.router_general_settings.pass_through_all_models is True - ): - llm_response = asyncio.create_task(litellm.atext_completion(**data)) else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -3412,14 +3414,15 @@ async def embeddings( elif ( llm_router is not None and data["model"] not in router_model_names - and llm_router.default_deployment is not None - ): # model in router deployments, calling a specific deployment on the router - tasks.append(llm_router.aembedding(**data)) - elif ( - llm_router is not None and llm_router.router_general_settings.pass_through_all_models is True ): tasks.append(litellm.aembedding(**data)) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + tasks.append(llm_router.aembedding(**data)) elif user_model is not None: # `litellm --model ` tasks.append(litellm.aembedding(**data)) else: From bb6f72b315b2bb66140cec6e6b24eab61b35ab1a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:47:54 -0700 Subject: [PATCH 051/655] add mistral sdk usage --- docs/my-website/docs/proxy/quick_start.md | 159 ------------------- docs/my-website/docs/proxy/user_keys.md | 180 ++++++++++++++++++++++ 2 files changed, 180 insertions(+), 159 deletions(-) diff --git a/docs/my-website/docs/proxy/quick_start.md b/docs/my-website/docs/proxy/quick_start.md index 4ee4d8831..31eb52141 100644 --- a/docs/my-website/docs/proxy/quick_start.md +++ b/docs/my-website/docs/proxy/quick_start.md @@ -396,165 +396,6 @@ print(response) - POST `/key/generate` - generate a key to access the proxy -## Using with OpenAI compatible projects -Set `base_url` to the LiteLLM Proxy server - - - - -```python -import openai -client = openai.OpenAI( - api_key="anything", - base_url="http://0.0.0.0:4000" -) - -# request sent to model set on litellm proxy, `litellm --model` -response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } -]) - -print(response) - -``` - - - -#### Start the LiteLLM proxy -```shell -litellm --model gpt-3.5-turbo - -#INFO: Proxy running on http://0.0.0.0:4000 -``` - -#### 1. Clone the repo - -```shell -git clone https://github.com/danny-avila/LibreChat.git -``` - - -#### 2. Modify Librechat's `docker-compose.yml` -LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below -```yaml -OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions -``` - -#### 3. Save fake OpenAI key in Librechat's `.env` - -Copy Librechat's `.env.example` to `.env` and overwrite the default OPENAI_API_KEY (by default it requires the user to pass a key). -```env -OPENAI_API_KEY=sk-1234 -``` - -#### 4. Run LibreChat: -```shell -docker compose up -``` - - - - -Continue-Dev brings ChatGPT to VSCode. See how to [install it here](https://continue.dev/docs/quickstart). - -In the [config.py](https://continue.dev/docs/reference/Models/openai) set this as your default model. -```python - default=OpenAI( - api_key="IGNORED", - model="fake-model-name", - context_length=2048, # customize if needed for your model - api_base="http://localhost:4000" # your proxy server url - ), -``` - -Credits [@vividfog](https://github.com/ollama/ollama/issues/305#issuecomment-1751848077) for this tutorial. - - - - -```shell -$ pip install aider - -$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key -``` - - - -```python -pip install pyautogen -``` - -```python -from autogen import AssistantAgent, UserProxyAgent, oai -config_list=[ - { - "model": "my-fake-model", - "api_base": "http://localhost:4000", #litellm compatible endpoint - "api_type": "open_ai", - "api_key": "NULL", # just a placeholder - } -] - -response = oai.Completion.create(config_list=config_list, prompt="Hi") -print(response) # works fine - -llm_config={ - "config_list": config_list, -} - -assistant = AssistantAgent("assistant", llm_config=llm_config) -user_proxy = UserProxyAgent("user_proxy") -user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stock price change YTD.", config_list=config_list) -``` - -Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial. - - - -A guidance language for controlling large language models. -https://github.com/guidance-ai/guidance - -**NOTE:** Guidance sends additional params like `stop_sequences` which can cause some models to fail if they don't support it. - -**Fix**: Start your proxy using the `--drop_params` flag - -```shell -litellm --model ollama/codellama --temperature 0.3 --max_tokens 2048 --drop_params -``` - -```python -import guidance - -# set api_base to your proxy -# set api_key to anything -gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything") - -experts = guidance(''' -{{#system~}} -You are a helpful and terse assistant. -{{~/system}} - -{{#user~}} -I want a response to the following question: -{{query}} -Name 3 world-class experts (past or present) who would be great at answering this? -Don't answer the question yet. -{{~/user}} - -{{#assistant~}} -{{gen 'expert_names' temperature=0 max_tokens=300}} -{{~/assistant}} -''', llm=gpt4) - -result = experts(query='How can I be more productive?') -print(result) -``` - - - ## Debugging Proxy Events that occur during normal operation diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 00fb3526b..5e57c18b1 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -234,6 +234,26 @@ main(); ``` + + +```python +import os +from mistralai.client import MistralClient +from mistralai.models.chat_completion import ChatMessage + + +client = MistralClient(api_key="sk-1234", endpoint="http://0.0.0.0:4000") +chat_response = client.chat( + model="mistral-small-latest", + messages=[ + {"role": "user", "content": "this is a test request, write a short poem"} + ], +) +print(chat_response.choices[0].message.content) +``` + + + ```python @@ -566,6 +586,166 @@ curl --location 'http://0.0.0.0:4000/moderations' \ ``` +## Using with OpenAI compatible projects +Set `base_url` to the LiteLLM Proxy server + + + + +```python +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +# request sent to model set on litellm proxy, `litellm --model` +response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } +]) + +print(response) + +``` + + + +#### Start the LiteLLM proxy +```shell +litellm --model gpt-3.5-turbo + +#INFO: Proxy running on http://0.0.0.0:4000 +``` + +#### 1. Clone the repo + +```shell +git clone https://github.com/danny-avila/LibreChat.git +``` + + +#### 2. Modify Librechat's `docker-compose.yml` +LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below +```yaml +OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions +``` + +#### 3. Save fake OpenAI key in Librechat's `.env` + +Copy Librechat's `.env.example` to `.env` and overwrite the default OPENAI_API_KEY (by default it requires the user to pass a key). +```env +OPENAI_API_KEY=sk-1234 +``` + +#### 4. Run LibreChat: +```shell +docker compose up +``` + + + + +Continue-Dev brings ChatGPT to VSCode. See how to [install it here](https://continue.dev/docs/quickstart). + +In the [config.py](https://continue.dev/docs/reference/Models/openai) set this as your default model. +```python + default=OpenAI( + api_key="IGNORED", + model="fake-model-name", + context_length=2048, # customize if needed for your model + api_base="http://localhost:4000" # your proxy server url + ), +``` + +Credits [@vividfog](https://github.com/ollama/ollama/issues/305#issuecomment-1751848077) for this tutorial. + + + + +```shell +$ pip install aider + +$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key +``` + + + +```python +pip install pyautogen +``` + +```python +from autogen import AssistantAgent, UserProxyAgent, oai +config_list=[ + { + "model": "my-fake-model", + "api_base": "http://localhost:4000", #litellm compatible endpoint + "api_type": "open_ai", + "api_key": "NULL", # just a placeholder + } +] + +response = oai.Completion.create(config_list=config_list, prompt="Hi") +print(response) # works fine + +llm_config={ + "config_list": config_list, +} + +assistant = AssistantAgent("assistant", llm_config=llm_config) +user_proxy = UserProxyAgent("user_proxy") +user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stock price change YTD.", config_list=config_list) +``` + +Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial. + + + +A guidance language for controlling large language models. +https://github.com/guidance-ai/guidance + +**NOTE:** Guidance sends additional params like `stop_sequences` which can cause some models to fail if they don't support it. + +**Fix**: Start your proxy using the `--drop_params` flag + +```shell +litellm --model ollama/codellama --temperature 0.3 --max_tokens 2048 --drop_params +``` + +```python +import guidance + +# set api_base to your proxy +# set api_key to anything +gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything") + +experts = guidance(''' +{{#system~}} +You are a helpful and terse assistant. +{{~/system}} + +{{#user~}} +I want a response to the following question: +{{query}} +Name 3 world-class experts (past or present) who would be great at answering this? +Don't answer the question yet. +{{~/user}} + +{{#assistant~}} +{{gen 'expert_names' temperature=0 max_tokens=300}} +{{~/assistant}} +''', llm=gpt4) + +result = experts(query='How can I be more productive?') +print(result) +``` + + + + ## Advanced ### (BETA) Batch Completions - pass multiple models From 68e94f097678f3a32fcd875f8e34a23b2357ed24 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:48:54 -0700 Subject: [PATCH 052/655] example mistral sdk --- litellm/proxy/proxy_config.yaml | 4 ++++ litellm/proxy/tests/test_mistral_sdk.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 litellm/proxy/tests/test_mistral_sdk.py diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index bd8f5bfd0..8dc03d6e0 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -8,6 +8,10 @@ model_list: litellm_params: model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct api_key: "os.environ/FIREWORKS" + - model_name: mistral-small-latest + litellm_params: + model: mistral/mistral-small-latest + api_key: "os.environ/MISTRAL_API_KEY" - model_name: tts litellm_params: model: openai/tts-1 diff --git a/litellm/proxy/tests/test_mistral_sdk.py b/litellm/proxy/tests/test_mistral_sdk.py new file mode 100644 index 000000000..0adc67b93 --- /dev/null +++ b/litellm/proxy/tests/test_mistral_sdk.py @@ -0,0 +1,13 @@ +import os + +from mistralai.client import MistralClient +from mistralai.models.chat_completion import ChatMessage + +client = MistralClient(api_key="sk-1234", endpoint="http://0.0.0.0:4000") +chat_response = client.chat( + model="mistral-small-latest", + messages=[ + {"role": "user", "content": "this is a test request, write a short poem"} + ], +) +print(chat_response.choices[0].message.content) From a2fd8459fc59a670d9c2302d2d3518934da4b7a8 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 19:50:07 -0700 Subject: [PATCH 053/655] fix(utils.py): don't raise error on openai content filter during streaming - return as is Fixes issue where we would raise an error vs. openai who return the chunk with finish reason as 'content_filter' --- litellm/tests/test_streaming.py | 50 +++++++++++++++++++++++++++++++++ litellm/utils.py | 15 ---------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 768c8752c..9aebc0f24 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -3248,6 +3248,56 @@ def test_unit_test_custom_stream_wrapper(): assert freq == 1 +def test_unit_test_custom_stream_wrapper_openai(): + """ + Test if last streaming chunk ends with '?', if the message repeats itself. + """ + litellm.set_verbose = False + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + delta=litellm.utils.Delta( + content=None, function_call=None, role=None, tool_calls=None + ), + finish_reason="content_filter", + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo-0613", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + + completion_stream = ModelResponseIterator(model_response=chunk) + + response = litellm.CustomStreamWrapper( + completion_stream=completion_stream, + model="gpt-3.5-turbo", + custom_llm_provider="azure", + logging_obj=litellm.Logging( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey"}], + stream=True, + call_type="completion", + start_time=time.time(), + litellm_call_id="12345", + function_id="1245", + ), + ) + + stream_finish_reason: Optional[str] = None + for chunk in response: + assert chunk.choices[0].delta.content is None + if chunk.choices[0].finish_reason is not None: + stream_finish_reason = chunk.choices[0].finish_reason + assert stream_finish_reason == "content_filter" + + def test_aamazing_unit_test_custom_stream_wrapper_n(): """ Test if the translated output maps exactly to the received openai input diff --git a/litellm/utils.py b/litellm/utils.py index 5e4dc4479..87f50f5ed 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8840,21 +8840,6 @@ class CustomStreamWrapper: if str_line.choices[0].finish_reason: is_finished = True finish_reason = str_line.choices[0].finish_reason - if finish_reason == "content_filter": - if hasattr(str_line.choices[0], "content_filter_result"): - error_message = json.dumps( - str_line.choices[0].content_filter_result - ) - else: - error_message = "{} Response={}".format( - self.custom_llm_provider, str(dict(str_line)) - ) - - raise litellm.ContentPolicyViolationError( - message=error_message, - llm_provider=self.custom_llm_provider, - model=self.model, - ) # checking for logprobs if ( From 646b2d50f9f32686f699d31a0397a95659564f81 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 19:52:53 -0700 Subject: [PATCH 054/655] docs -quick start --- docs/my-website/docs/proxy/quick_start.md | 6 ++++++ docs/my-website/docs/proxy/user_keys.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/quick_start.md b/docs/my-website/docs/proxy/quick_start.md index 31eb52141..21698bd49 100644 --- a/docs/my-website/docs/proxy/quick_start.md +++ b/docs/my-website/docs/proxy/quick_start.md @@ -255,6 +255,12 @@ litellm --config your_config.yaml ## Using LiteLLM Proxy - Curl Request, OpenAI Package, Langchain +:::info +LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, Mistral SDK, LLamaIndex, Langchain (Js, Python) + +[More examples here](user_keys) +::: + diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 5e57c18b1..44e1c8842 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl +# 💡 Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl :::info From 826bb125e80d6e27678cc88a45ee0bde71125dd9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 19:54:40 -0700 Subject: [PATCH 055/655] test(test_router.py): handle azure api instability --- litellm/tests/test_router.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 86506a589..715ba10d5 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -1117,6 +1117,8 @@ async def test_aimg_gen_on_router(): assert len(response.data) > 0 router.reset() + except litellm.InternalServerError as e: + pass except Exception as e: if "Your task failed as a result of our safety system." in str(e): pass From c2e309baf36ebe6abcd4b747cade6d637edf7fe6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 20:05:28 -0700 Subject: [PATCH 056/655] docs using litellm proxy --- docs/my-website/docs/proxy/quick_start.md | 28 +++++++++++++++++++++++ docs/my-website/docs/proxy/user_keys.md | 28 +++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/docs/my-website/docs/proxy/quick_start.md b/docs/my-website/docs/proxy/quick_start.md index 21698bd49..9da860b0d 100644 --- a/docs/my-website/docs/proxy/quick_start.md +++ b/docs/my-website/docs/proxy/quick_start.md @@ -388,6 +388,34 @@ print(response) ``` + + + +```python +import os + +from anthropic import Anthropic + +client = Anthropic( + base_url="http://localhost:4000", # proxy endpoint + api_key="sk-s4xN1IiLTCytwtZFJaYQrA", # litellm proxy virtual key +) + +message = client.messages.create( + max_tokens=1024, + messages=[ + { + "role": "user", + "content": "Hello, Claude", + } + ], + model="claude-3-opus-20240229", +) +print(message.content) +``` + + + [**More Info**](./configs.md) diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 44e1c8842..7417ef6bd 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -234,6 +234,34 @@ main(); ``` + + + +```python +import os + +from anthropic import Anthropic + +client = Anthropic( + base_url="http://localhost:4000", # proxy endpoint + api_key="sk-s4xN1IiLTCytwtZFJaYQrA", # litellm proxy virtual key +) + +message = client.messages.create( + max_tokens=1024, + messages=[ + { + "role": "user", + "content": "Hello, Claude", + } + ], + model="claude-3-opus-20240229", +) +print(message.content) +``` + + + ```python From a540c2373073b0b7ad86cefa93ecb9c4e55c4f50 Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 12:08:46 +0900 Subject: [PATCH 057/655] improvements --- docs/my-website/package.json | 2 +- docs/my-website/src/theme/SearchBar/Canary.jsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/my-website/package.json b/docs/my-website/package.json index 9b955a7ad..734da6b79 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -19,7 +19,7 @@ "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", "@getcanary/docusaurus-pagefind": "^0.0.7", - "@getcanary/web": "^0.0.46", + "@getcanary/web": "^0.0.47", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", diff --git a/docs/my-website/src/theme/SearchBar/Canary.jsx b/docs/my-website/src/theme/SearchBar/Canary.jsx index 03cec5292..10bd1f7b7 100644 --- a/docs/my-website/src/theme/SearchBar/Canary.jsx +++ b/docs/my-website/src/theme/SearchBar/Canary.jsx @@ -36,7 +36,7 @@ export default function Canary({ path }) { Date: Thu, 25 Jul 2024 20:09:49 -0700 Subject: [PATCH 058/655] deploy link to using litellm --- docs/my-website/docs/proxy/deploy.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md index ff575f0d4..e8bc432b8 100644 --- a/docs/my-website/docs/proxy/deploy.md +++ b/docs/my-website/docs/proxy/deploy.md @@ -254,6 +254,15 @@ Your OpenAI proxy server is now running on `http://127.0.0.1:4000`. **That's it ! That's the quick start to deploy litellm** +## Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl + +:::info +💡 Go here 👉 [to make your first LLM API Request](user_keys) + +LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, Mistral SDK, LLamaIndex, Langchain (Js, Python) + +::: + ## Options to deploy LiteLLM | Docs | When to Use | From 50bf488b58f790d191adfd53963b603ebc216bf4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 20:10:02 -0700 Subject: [PATCH 059/655] read me link to using litellm --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3ac5f0285..f36f189f3 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,10 @@ $ litellm --model huggingface/bigcode/starcoder ### Step 2: Make ChatCompletions Request to Proxy + +> [!IMPORTANT] +> [Use with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/migration) + ```python import openai # openai v1.0.0+ client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url From b6bcb7eb3cfb4493a43f0757eb9ac2d72ffec63f Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 12:10:05 +0900 Subject: [PATCH 060/655] update lock --- docs/my-website/yarn.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index 28fe315c6..15f2825fd 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1970,10 +1970,10 @@ micromatch "^4.0.7" pagefind "^1.1.0" -"@getcanary/web@^0.0.46": - version "0.0.46" - resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.46.tgz#98bdf0576665f03a2d3f645458c2a286f9243e3a" - integrity sha512-PdI/jiKzBX6OashQxWL+kBvHRL+0ciK9ei6OE6Poukz1/P+W4We6TXXPqsjbrWryPg2l6sammJY04clFyPIqiQ== +"@getcanary/web@^0.0.47": + version "0.0.47" + resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.47.tgz#b2e8f843b65094208da2c92b8cf14ac91b0e1d95" + integrity sha512-ITNbhXZAcOffvDVCJWkCkQvXxSprA1OTOHn7AJa/efq09nQZIrI/YaluGhVf51g2jkUV0ixBmMnshWy0g5VG6w== dependencies: "@floating-ui/dom" "^1.6.8" "@lit-labs/observers" "^2.0.2" From 4bf9681df4ef2cd1108ff41b56b97cd1b524d5b4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 20:12:32 -0700 Subject: [PATCH 061/655] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f36f189f3..306f07ec2 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ $ litellm --model huggingface/bigcode/starcoder > [!IMPORTANT] -> [Use with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/migration) +> 💡 [Use LiteLLM Proxy with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/proxy/user_keys) ```python import openai # openai v1.0.0+ From 2626cc6d305aad110965a704d0e21cd41c125027 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Jul 2024 20:16:05 -0700 Subject: [PATCH 062/655] =?UTF-8?q?bump:=20version=201.42.1=20=E2=86=92=20?= =?UTF-8?q?1.42.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 08a41c9ec..dfaa75125 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.42.1" +version = "1.42.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.42.1" +version = "1.42.2" version_files = [ "pyproject.toml:^version" ] From 2f773d9cb6388c6e1dcd7a742101ecd17506181b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 22:11:32 -0700 Subject: [PATCH 063/655] fix(litellm_cost_calc/google.py): support meta llama vertex ai cost tracking --- litellm/litellm_core_utils/llm_cost_calc/google.py | 2 +- litellm/proxy/_new_secret_config.yaml | 11 ++--------- litellm/tests/test_amazing_vertex_completion.py | 9 ++++++++- litellm/tests/test_completion_cost.py | 11 +++++++++++ litellm/utils.py | 3 +++ 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/litellm/litellm_core_utils/llm_cost_calc/google.py b/litellm/litellm_core_utils/llm_cost_calc/google.py index 76da0da51..26eeb7b7a 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/google.py +++ b/litellm/litellm_core_utils/llm_cost_calc/google.py @@ -44,7 +44,7 @@ def cost_router( Returns - str, the specific google cost calc function it should route to. """ - if custom_llm_provider == "vertex_ai" and "claude" in model: + if custom_llm_provider == "vertex_ai" and ("claude" in model or "llama" in model): return "cost_per_token" elif custom_llm_provider == "gemini": return "cost_per_token" diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 173624c25..f4a89cc3a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,11 +1,4 @@ model_list: - - model_name: "test-model" + - model_name: "gpt-3.5-turbo" litellm_params: - model: "openai/text-embedding-ada-002" - - model_name: "my-custom-model" - litellm_params: - model: "my-custom-llm/my-model" - -litellm_settings: - custom_provider_map: - - {"provider": "my-custom-llm", "custom_handler": custom_handler.my_custom_llm} + model: "openai/gpt-3.5-turbo" diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index b9762afcb..aa0ea471a 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -901,7 +901,12 @@ from litellm.tests.test_completion import response_format_tests @pytest.mark.parametrize( "model", ["vertex_ai/meta/llama3-405b-instruct-maas"] ) # "vertex_ai", -@pytest.mark.parametrize("sync_mode", [True, False]) # "vertex_ai", +@pytest.mark.parametrize( + "sync_mode", + [ + True, + ], +) # False @pytest.mark.asyncio async def test_llama_3_httpx(model, sync_mode): try: @@ -932,6 +937,8 @@ async def test_llama_3_httpx(model, sync_mode): response_format_tests(response=response) print(f"response: {response}") + + assert False except litellm.RateLimitError as e: pass except Exception as e: diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index 289e200d9..41448bd56 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -907,6 +907,17 @@ def test_vertex_ai_gemini_predict_cost(): assert predictive_cost > 0 +def test_vertex_ai_llama_predict_cost(): + model = "meta/llama3-405b-instruct-maas" + messages = [{"role": "user", "content": "Hey, hows it going???"}] + custom_llm_provider = "vertex_ai" + predictive_cost = completion_cost( + model=model, messages=messages, custom_llm_provider=custom_llm_provider + ) + + assert predictive_cost == 0 + + @pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"]) def test_completion_cost_tts(model): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" diff --git a/litellm/utils.py b/litellm/utils.py index eecc704b7..7c22953bc 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4919,6 +4919,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod azure_llms = litellm.azure_llms if model in azure_llms: model = azure_llms[model] + if custom_llm_provider is not None and custom_llm_provider == "vertex_ai": + if "meta/" + model in litellm.vertex_llama3_models: + model = "meta/" + model ########################## if custom_llm_provider is None: # Get custom_llm_provider From 0ce5a7962ebca407d9dadb1bd2d0a2ad1b2f23a7 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 22:18:17 -0700 Subject: [PATCH 064/655] =?UTF-8?q?bump:=20version=201.42.2=20=E2=86=92=20?= =?UTF-8?q?1.42.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dfaa75125..3bc808e7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.42.2" +version = "1.42.3" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.42.2" +version = "1.42.3" version_files = [ "pyproject.toml:^version" ] From ce210ddaf638d9022d6e8b8f0a6d0781da20b2df Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 22:30:55 -0700 Subject: [PATCH 065/655] fix(vertex_ai_llama3.py): Fix llama3 streaming issue Closes https://github.com/BerriAI/litellm/issues/4885 --- litellm/llms/vertex_ai_llama.py | 3 ++- litellm/tests/test_amazing_vertex_completion.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/litellm/llms/vertex_ai_llama.py b/litellm/llms/vertex_ai_llama.py index f33c127f7..cc4786c4b 100644 --- a/litellm/llms/vertex_ai_llama.py +++ b/litellm/llms/vertex_ai_llama.py @@ -103,7 +103,8 @@ class VertexAILlama3Config: for param, value in non_default_params.items(): if param == "max_tokens": optional_params["max_tokens"] = value - + if param == "stream": + optional_params["stream"] = value return optional_params diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index aa0ea471a..bebe5d031 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -937,8 +937,6 @@ async def test_llama_3_httpx(model, sync_mode): response_format_tests(response=response) print(f"response: {response}") - - assert False except litellm.RateLimitError as e: pass except Exception as e: From afcad9e12cecdc77fac9e0aa4b9a82891a551b18 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 25 Jul 2024 22:45:03 -0700 Subject: [PATCH 066/655] docs(custom_llm_server.md): cleanup docs --- docs/my-website/docs/providers/custom_llm_server.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/my-website/docs/providers/custom_llm_server.md b/docs/my-website/docs/providers/custom_llm_server.md index 70fc4cea5..874f485d7 100644 --- a/docs/my-website/docs/providers/custom_llm_server.md +++ b/docs/my-website/docs/providers/custom_llm_server.md @@ -1,7 +1,6 @@ # Custom API Server (Custom Format) -LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format - +Call your custom torch-serve / internal LLM APIs via LiteLLM :::info From 3967007595fb7641f740ce71436369e4ba9dc15f Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 21:06:53 +0900 Subject: [PATCH 067/655] update to latest --- docs/my-website/docusaurus.config.js | 8 +++ docs/my-website/package.json | 4 +- .../my-website/src/theme/SearchBar/Canary.jsx | 59 ------------------- docs/my-website/src/theme/SearchBar/index.jsx | 29 --------- docs/my-website/yarn.lock | 22 +++---- 5 files changed, 18 insertions(+), 104 deletions(-) delete mode 100644 docs/my-website/src/theme/SearchBar/Canary.jsx delete mode 100644 docs/my-website/src/theme/SearchBar/index.jsx diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index 4fb1c51c2..7974f613c 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -35,6 +35,14 @@ const config = { "--canary-color-primary-c": 0.1, "--canary-color-primary-h": 270, }, + pagefind: { + ranking: { + pageLength: 0.9, + termFrequency: 1.0, + termSimilarity: 1.0, + termSaturation: 1.5, + } + } }, ], [ diff --git a/docs/my-website/package.json b/docs/my-website/package.json index 734da6b79..7628a862f 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -18,8 +18,8 @@ "@docusaurus/plugin-google-gtag": "^2.4.1", "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", - "@getcanary/docusaurus-pagefind": "^0.0.7", - "@getcanary/web": "^0.0.47", + "@getcanary/docusaurus-pagefind": "^0.0.11", + "@getcanary/web": "^0.0.53", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", diff --git a/docs/my-website/src/theme/SearchBar/Canary.jsx b/docs/my-website/src/theme/SearchBar/Canary.jsx deleted file mode 100644 index 10bd1f7b7..000000000 --- a/docs/my-website/src/theme/SearchBar/Canary.jsx +++ /dev/null @@ -1,59 +0,0 @@ -import React from "react"; - -export default function Canary({ path }) { - const [loaded, setLoaded] = React.useState(false); - - React.useEffect(() => { - Promise.all([ - import("@getcanary/web/components/canary-root"), - import("@getcanary/web/components/canary-provider-pagefind"), - import("@getcanary/web/components/canary-modal"), - import("@getcanary/web/components/canary-trigger-searchbar"), - import("@getcanary/web/components/canary-content"), - import("@getcanary/web/components/canary-search"), - import("@getcanary/web/components/canary-search-input"), - import("@getcanary/web/components/canary-search-results-group"), - import("@getcanary/web/components/canary-callout-calendly"), - import("@getcanary/web/components/canary-callout-discord"), - ]) - .then(() => setLoaded(true)) - .catch((e) => - console.error("Maybe you forgot to install '@getcanary/web'?", e), - ); - }, []); - - if (!loaded) { - return null; - } - - return ( - - - - - - - - - - - - - - - - ); -} diff --git a/docs/my-website/src/theme/SearchBar/index.jsx b/docs/my-website/src/theme/SearchBar/index.jsx deleted file mode 100644 index da018f108..000000000 --- a/docs/my-website/src/theme/SearchBar/index.jsx +++ /dev/null @@ -1,29 +0,0 @@ -import React from "react"; - -import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; -import { usePluginData } from "@docusaurus/useGlobalData"; - -import Canary from "./Canary"; - -export default function Index() { - const { siteConfig } = useDocusaurusContext(); - const { options } = usePluginData("docusaurus-plugin-pagefind-canary"); - - const [path, setPath] = React.useState(""); - - React.useEffect(() => { - setPath(`${siteConfig.baseUrl}pagefind/pagefind.js`); - }, [siteConfig]); - - React.useEffect(() => { - for (const [k, v] of Object.entries(options?.styles ?? {})) { - document.body.style.setProperty(k, v); - } - }, [options]); - - if (!path) { - return null; - } - - return ; -} diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index 15f2825fd..c00d4c511 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1961,19 +1961,19 @@ resolved "https://registry.yarnpkg.com/@floating-ui/utils/-/utils-0.2.5.tgz#105c37d9d9620ce69b7f692a20c821bf1ad2cbf9" integrity sha512-sTcG+QZ6fdEUObICavU+aB3Mp8HY4n14wYHdxK4fXjPmv3PXZZeY5RaguJmGyeH/CJQhX3fqKUtS4qc1LoHwhQ== -"@getcanary/docusaurus-pagefind@^0.0.7": - version "0.0.7" - resolved "https://registry.yarnpkg.com/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.7.tgz#8a086891a456f5e333d43216bd5cf991718de106" - integrity sha512-1VDAF3/xfigsR6Tj6sQKt/OcCzETIIhhVSbhicBlxbhYY98j8jk3EyE9uxWgGg+lYz+Q+/Xmq+7tvsqBlKreeg== +"@getcanary/docusaurus-pagefind@^0.0.11": + version "0.0.11" + resolved "https://registry.yarnpkg.com/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.11.tgz#c4938b4f3d0f99c4d46d9b11a8800934f2bd7009" + integrity sha512-CN6nI8I5mdvE4Lt0+T95HNQdH8x6P4b2/T2YWbtjP0EB4TZl78lpuWXB3RZwiY7cY+C+aRL4Jo52SAqrBW2eqQ== dependencies: cli-progress "^3.12.0" micromatch "^4.0.7" pagefind "^1.1.0" -"@getcanary/web@^0.0.47": - version "0.0.47" - resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.47.tgz#b2e8f843b65094208da2c92b8cf14ac91b0e1d95" - integrity sha512-ITNbhXZAcOffvDVCJWkCkQvXxSprA1OTOHn7AJa/efq09nQZIrI/YaluGhVf51g2jkUV0ixBmMnshWy0g5VG6w== +"@getcanary/web@^0.0.53": + version "0.0.53" + resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.53.tgz#f842b054465bca484e5f0df184e12248106a1234" + integrity sha512-/vlzQJlF5J6smAFnND+pu91IN26w3sbLh8IA0YaKC/MQJr8ubh74vEgLS5WrwwjBUCEXYweQdPIOz2cbA/64Zw== dependencies: "@floating-ui/dom" "^1.6.8" "@lit-labs/observers" "^2.0.2" @@ -1982,7 +1982,6 @@ highlight.js "^11.10.0" lit "^3.1.4" marked "^13.0.2" - p-debounce "^4.0.0" "@hapi/hoek@^9.0.0": version "9.3.0" @@ -8949,11 +8948,6 @@ p-cancelable@^1.0.0: resolved "https://registry.npmjs.org/p-cancelable/-/p-cancelable-1.1.0.tgz" integrity sha512-s73XxOZ4zpt1edZYZzvhqFa6uvQc1vwUa0K0BdtIZgQMAJj9IbebH+JkgKZc9h+B05PKHLOTl4ajG1BmNrVZlw== -p-debounce@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/p-debounce/-/p-debounce-4.0.0.tgz#348e3f44489baa9435cc7d807f17b3bb2fb16b24" - integrity sha512-4Ispi9I9qYGO4lueiLDhe4q4iK5ERK8reLsuzH6BPaXn53EGaua8H66PXIFGrW897hwjXp+pVLrm/DLxN0RF0A== - p-event@^1.0.0: version "1.3.0" resolved "https://registry.npmjs.org/p-event/-/p-event-1.3.0.tgz" From c54f23f936e45da59c20bf61f20e2f9a89c31f9f Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 21:46:36 +0900 Subject: [PATCH 068/655] wrap existing search bar --- docs/my-website/docusaurus.config.js | 1 + docs/my-website/package.json | 2 +- docs/my-website/src/theme/SearchBar.js | 84 ++++++++++++++++++++++++++ docs/my-website/yarn.lock | 8 +-- 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 docs/my-website/src/theme/SearchBar.js diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index 7974f613c..0ae35686c 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -31,6 +31,7 @@ const config = { [ require.resolve("@getcanary/docusaurus-pagefind"), { + indexOnly: true, styles: { "--canary-color-primary-c": 0.1, "--canary-color-primary-h": 270, diff --git a/docs/my-website/package.json b/docs/my-website/package.json index 7628a862f..6fc4ea23c 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -19,7 +19,7 @@ "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", "@getcanary/docusaurus-pagefind": "^0.0.11", - "@getcanary/web": "^0.0.53", + "@getcanary/web": "^0.0.54", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", diff --git a/docs/my-website/src/theme/SearchBar.js b/docs/my-website/src/theme/SearchBar.js new file mode 100644 index 000000000..09ae04630 --- /dev/null +++ b/docs/my-website/src/theme/SearchBar.js @@ -0,0 +1,84 @@ +import React from "react"; +import SearchBar from "@theme-original/SearchBar"; + +import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; +import { usePluginData } from "@docusaurus/useGlobalData"; + +export default function SearchBarWrapper(props) { + const { siteConfig } = useDocusaurusContext(); + const { options } = usePluginData("docusaurus-plugin-pagefind-canary"); + + const [path, setPath] = React.useState(""); + const [loaded, setLoaded] = React.useState(false); + + React.useEffect(() => { + setPath(`${siteConfig.baseUrl}pagefind/pagefind.js`); + }, [siteConfig]); + + React.useEffect(() => { + Promise.all([ + import("@getcanary/web/components/canary-root"), + import("@getcanary/web/components/canary-provider-pagefind"), + import("@getcanary/web/components/canary-modal"), + import("@getcanary/web/components/canary-trigger-logo"), + import("@getcanary/web/components/canary-content"), + import("@getcanary/web/components/canary-search"), + import("@getcanary/web/components/canary-search-input"), + import("@getcanary/web/components/canary-search-results"), + import("@getcanary/web/components/canary-footer"), + import("@getcanary/web/components/canary-callout-calendly"), + import("@getcanary/web/components/canary-callout-discord"), + ]) + .then(() => setLoaded(true)) + .catch(console.error); + }, []); + + if (!loaded || !path) { + return null; + } + + return ( +
+ + + + + + + + + + + + + + + + + + +
+ ); +} diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index c00d4c511..7e0b432a5 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1970,10 +1970,10 @@ micromatch "^4.0.7" pagefind "^1.1.0" -"@getcanary/web@^0.0.53": - version "0.0.53" - resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.53.tgz#f842b054465bca484e5f0df184e12248106a1234" - integrity sha512-/vlzQJlF5J6smAFnND+pu91IN26w3sbLh8IA0YaKC/MQJr8ubh74vEgLS5WrwwjBUCEXYweQdPIOz2cbA/64Zw== +"@getcanary/web@^0.0.54": + version "0.0.54" + resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.54.tgz#a80d3a93b79beae3216e28391f35da32cac011c1" + integrity sha512-6ghmuusVq7pWNMj3SonRJ9Ncn0Yz2GxdT0pb7LLUJRdQWyxeP5UmnrhQ3jpq4NKzSqaIb8nK4M61Wikfbyr24Q== dependencies: "@floating-ui/dom" "^1.6.8" "@lit-labs/observers" "^2.0.2" From 8a45abb563b377cc1d6d3c7f0760a44b66d3b7cc Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 22:00:48 +0900 Subject: [PATCH 069/655] fix import and add fallback --- docs/my-website/src/theme/SearchBar.js | 72 +++++++++++++------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/docs/my-website/src/theme/SearchBar.js b/docs/my-website/src/theme/SearchBar.js index 09ae04630..6e2014f3c 100644 --- a/docs/my-website/src/theme/SearchBar.js +++ b/docs/my-website/src/theme/SearchBar.js @@ -24,7 +24,7 @@ export default function SearchBarWrapper(props) { import("@getcanary/web/components/canary-content"), import("@getcanary/web/components/canary-search"), import("@getcanary/web/components/canary-search-input"), - import("@getcanary/web/components/canary-search-results"), + import("@getcanary/web/components/canary-search-results-group"), import("@getcanary/web/components/canary-footer"), import("@getcanary/web/components/canary-callout-calendly"), import("@getcanary/web/components/canary-callout-discord"), @@ -33,10 +33,6 @@ export default function SearchBarWrapper(props) { .catch(console.error); }, []); - if (!loaded || !path) { - return null; - } - return (
- - - - - - - - - - - - - - - - + {!loaded || !path ? ( + 🐤 + ) : ( + + + + + + + + + + + + + + + + + )}
From 10ffb5a960ba278f63265ba30edaedfdd5bb290b Mon Sep 17 00:00:00 2001 From: yujonglee Date: Fri, 26 Jul 2024 22:13:04 +0900 Subject: [PATCH 070/655] remove ui shift on reload --- docs/my-website/src/theme/SearchBar.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/my-website/src/theme/SearchBar.js b/docs/my-website/src/theme/SearchBar.js index 6e2014f3c..66c8c2b5c 100644 --- a/docs/my-website/src/theme/SearchBar.js +++ b/docs/my-website/src/theme/SearchBar.js @@ -43,7 +43,18 @@ export default function SearchBarWrapper(props) { }} > {!loaded || !path ? ( - 🐤 + ) : ( Date: Fri, 26 Jul 2024 08:38:08 -0700 Subject: [PATCH 071/655] feat(proxy_server.py): handle pydantic mockselvar error Fixes https://github.com/BerriAI/litellm/issues/4898#issuecomment-2252105485 --- litellm/proxy/proxy_server.py | 6 ++-- litellm/tests/test_pydantic.py | 64 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 litellm/tests/test_pydantic.py diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e75f99f31..1f35a06f0 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2371,13 +2371,15 @@ async def async_data_generator( try: start_time = time.time() async for chunk in response: - + verbose_proxy_logger.debug( + "async_data_generator: received streaming chunk - {}".format(chunk) + ) ### CALL HOOKS ### - modify outgoing data chunk = await proxy_logging_obj.async_post_call_streaming_hook( user_api_key_dict=user_api_key_dict, response=chunk ) - chunk = chunk.model_dump_json(exclude_none=True) + chunk = chunk.model_dump_json(exclude_none=True, exclude_unset=True) try: yield f"data: {chunk}\n\n" except Exception as e: diff --git a/litellm/tests/test_pydantic.py b/litellm/tests/test_pydantic.py new file mode 100644 index 000000000..8b4105440 --- /dev/null +++ b/litellm/tests/test_pydantic.py @@ -0,0 +1,64 @@ +import os +import sys +import traceback + +from dotenv import load_dotenv + +load_dotenv() +import io +import os + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import asyncio +import json +import os +import tempfile +from unittest.mock import MagicMock, patch + +import pytest + +import litellm +from litellm.types.utils import ( + ChatCompletionTokenLogprob, + ChoiceLogprobs, + Delta, + ModelResponse, + StreamingChoices, + TopLogprob, +) + +obj = ModelResponse( + id="chat-f9bad6ec3c1146e99368682a0e7403fc", + choices=[ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta(content="", role=None, function_call=None, tool_calls=None), + logprobs=ChoiceLogprobs( + content=[ + ChatCompletionTokenLogprob( + token="", + bytes=[], + logprob=-0.00018153927521780133, + top_logprobs=[ + TopLogprob( + token="", bytes=[], logprob=-0.00018153927521780133 + ), + TopLogprob( + token="\n\n", bytes=[10, 10], logprob=-9.062681198120117 + ), + ], + ) + ] + ), + ) + ], + created=1721976759, + model="Meta-Llama-3-8B-Instruct", + object="chat.completion.chunk", + system_fingerprint=None, +) + +print(obj.model_dump()) From 84482703b8b1473cbd72ccbbea2f6d49ec50d5df Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 08:59:53 -0700 Subject: [PATCH 072/655] docs(config.md): update wildcard docs --- docs/my-website/docs/proxy/configs.md | 2 +- litellm/proxy/_new_secret_config.yaml | 4 ++-- litellm/router.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index cb0841c60..424ef8615 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -295,7 +295,7 @@ Dynamically call any model from any given provider without the need to predefine model_list: - model_name: "*" # all requests where model not in your config go to this deployment litellm_params: - model: "openai/*" # passes our validation check that a real provider is given + model: "*" # passes our validation check that a real provider is given ``` 2. Start LiteLLM proxy diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index f4a89cc3a..deec60b43 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,4 +1,4 @@ model_list: - - model_name: "gpt-3.5-turbo" + - model_name: "*" litellm_params: - model: "openai/gpt-3.5-turbo" + model: "*" diff --git a/litellm/router.py b/litellm/router.py index d1198aa15..eff5f94db 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2937,8 +2937,8 @@ class Router: model_group = kwargs["litellm_params"]["metadata"].get( "model_group", None ) - - id = kwargs["litellm_params"].get("model_info", {}).get("id", None) + model_info = kwargs["litellm_params"].get("model_info", {}) or {} + id = model_info.get("id", None) if model_group is None or id is None: return elif isinstance(id, int): From 7ca29d987de3222f71dfc3214dbda63d34fd0e25 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 10:07:47 -0700 Subject: [PATCH 073/655] docs(docusaurus.config.js): add llm model cost map to docs --- docs/my-website/docusaurus.config.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index 0ae35686c..03473c12b 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -135,6 +135,11 @@ const config = { label: '🚀 Hosted', to: "docs/hosted" }, + { + href: 'https://contextlengthof.com/', + label: '💸 LLM Model Cost Map', + position: 'right', + }, { href: 'https://github.com/BerriAI/litellm', label: 'GitHub', From 9a6ed8cabb5b15410b1003dce8bfae2715e39fdf Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 10:41:52 -0700 Subject: [PATCH 074/655] fix(bedrock_httpx.py): fix streaming error message Fixes https://github.com/BerriAI/litellm/issues/4900 --- litellm/llms/bedrock_httpx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index fbb51fb93..644d850a8 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -245,7 +245,7 @@ async def make_call( return completion_stream except httpx.HTTPStatusError as err: error_code = err.response.status_code - raise BedrockError(status_code=error_code, message=str(err)) + raise BedrockError(status_code=error_code, message=err.response.text) except httpx.TimeoutException as e: raise BedrockError(status_code=408, message="Timeout error occurred.") except Exception as e: From b515d4f441ce8953bb6b420671ffbc2824f4eb0d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 10:51:17 -0700 Subject: [PATCH 075/655] docs(stream.md): add streaming token usage info to docs Closes https://github.com/BerriAI/litellm/issues/4904 --- docs/my-website/src/pages/stream.md | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/my-website/src/pages/stream.md b/docs/my-website/src/pages/stream.md index 5e8cc32ca..a524f4ba6 100644 --- a/docs/my-website/src/pages/stream.md +++ b/docs/my-website/src/pages/stream.md @@ -30,4 +30,48 @@ async def test_get_response(): response = asyncio.run(test_get_response()) print(response) +``` + +## Streaming Token Usage + +Supported across all providers. Works the same as openai. + +`stream_options={"include_usage": True}` + +If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. + +### SDK +```python +from litellm import completion +import os + +os.environ["OPENAI_API_KEY"] = "" + +response = completion(model="gpt-3.5-turbo", messages=messages, stream=True, stream_options={"include_usage": True}) +for chunk in response: + print(chunk['choices'][0]['delta']) +``` + +### PROXY + +```bash +curl https://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ], + "stream": true, + "stream_options": {"include_usage": true} + }' + ``` \ No newline at end of file From 9943c6d6079974b1f773674236d4ce86d2fd3d9e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 13:33:02 -0700 Subject: [PATCH 076/655] fix(proxy_server.py): fix get secret for environment_variables --- litellm/proxy/proxy_server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1f35a06f0..63f677ff1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1387,7 +1387,9 @@ class ProxyConfig: environment_variables = config.get("environment_variables", None) if environment_variables: for key, value in environment_variables.items(): - os.environ[key] = value + os.environ[key] = str( + litellm.get_secret(secret_name=key, default_value=value) + ) ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..) litellm_settings = config.get("litellm_settings", None) From 9b89280a90c4cf24d4e919ed211ea0cd42a55683 Mon Sep 17 00:00:00 2001 From: Idris Mokhtarzada Date: Fri, 26 Jul 2024 16:38:54 -0400 Subject: [PATCH 077/655] Use underscores Datadog does not play nice with special characters (as in "(seconds)"). Also just makes sense to standardize on either underscores or camelCase, but not mix-and-match. --- litellm/integrations/datadog.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index d835b3d67..98c71597d 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -91,12 +91,12 @@ class DataDogLogger: "id": id, "call_type": call_type, "cache_hit": cache_hit, - "startTime": start_time, - "endTime": end_time, - "responseTime (seconds)": response_time, + "start_time": start_time, + "end_time": end_time, + "response_time": response_time, "model": kwargs.get("model", ""), "user": kwargs.get("user", ""), - "modelParameters": optional_params, + "model_parameters": optional_params, "spend": kwargs.get("response_cost", 0), "messages": messages, "response": response_obj, From a7e877d15fc98c4ecb56cfa08fab8737faed7256 Mon Sep 17 00:00:00 2001 From: Idris Mokhtarzada Date: Fri, 26 Jul 2024 16:43:21 -0400 Subject: [PATCH 078/655] Use milliseconds for response_time in Datadog logs milliseconds is more commonly used and more standard than seconds --- litellm/integrations/datadog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index 98c71597d..01b8c81e0 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -61,7 +61,7 @@ class DataDogLogger: id = response_obj.get("id", str(uuid.uuid4())) usage = dict(usage) try: - response_time = (end_time - start_time).total_seconds() + response_time = (end_time - start_time).total_seconds() * 1000 except: response_time = None From e8d4234dbda96ff01e0c5e1a639e97c9b0b667e0 Mon Sep 17 00:00:00 2001 From: Idris Mokhtarzada Date: Fri, 26 Jul 2024 17:02:05 -0400 Subject: [PATCH 079/655] Better JSON serialization for Datadog logs Dicts are now properly serialized to JSON so that Datadog can parse the child attributes. Also, numbers and nulls are sent as numbers and nulls instead of strings. --- litellm/integrations/datadog.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index 01b8c81e0..97ad96a5d 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -9,6 +9,20 @@ import litellm, uuid from litellm._logging import print_verbose, verbose_logger +def make_json_serializable(payload): + for key, value in payload.items(): + try: + if isinstance(value, dict): + # recursively sanitize dicts + payload[key] = make_json_serializable(value.copy()) + if not isinstance(value, (str, int, float, bool, type(None))): + # everything else becomes a string + payload[key] = str(value) + except: + # non blocking if it can't cast to a str + pass + + class DataDogLogger: # Class variables or attributes def __init__( @@ -104,13 +118,7 @@ class DataDogLogger: "metadata": clean_metadata, } - # Ensure everything in the payload is converted to str - for key, value in payload.items(): - try: - payload[key] = str(value) - except: - # non blocking if it can't cast to a str - pass + make_json_serializable(payload) import json payload = json.dumps(payload) From c4e4b4675cb8242edc9aa2ffbad66479fd935ac8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 17:35:08 -0700 Subject: [PATCH 080/655] fix raise better error when crossing tpm / rpm limits --- .../proxy/hooks/parallel_request_limiter.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 89b7059de..fe9eaaee0 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -1,7 +1,7 @@ import sys import traceback from datetime import datetime, timedelta -from typing import Optional +from typing import Literal, Optional from fastapi import HTTPException @@ -37,6 +37,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): tpm_limit: int, rpm_limit: int, request_count_api_key: str, + rate_limit_type: Literal["user", "customer", "team"], ): current = await self.internal_usage_cache.async_get_cache( key=request_count_api_key @@ -44,7 +45,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): if current is None: if max_parallel_requests == 0 or tpm_limit == 0 or rpm_limit == 0: # base case - return self.raise_rate_limit_error() + return self.raise_rate_limit_error( + additional_details=f"Hit limit for {rate_limit_type}. Current limits: max_parallel_requests: {max_parallel_requests}, tpm_limit: {tpm_limit}, rpm_limit: {rpm_limit}" + ) new_val = { "current_requests": 1, "current_tpm": 0, @@ -70,7 +73,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): else: raise HTTPException( status_code=429, - detail=f"LiteLLM Rate Limit Handler: Crossed TPM, RPM Limit. current rpm: {current['current_rpm']}, rpm limit: {rpm_limit}, current tpm: {current['current_tpm']}, tpm limit: {tpm_limit}", + detail=f"LiteLLM Rate Limit Handler for rate limit type = {rate_limit_type}. Crossed TPM, RPM Limit. current rpm: {current['current_rpm']}, rpm limit: {rpm_limit}, current tpm: {current['current_tpm']}, tpm limit: {tpm_limit}", headers={"retry-after": str(self.time_to_next_minute())}, ) @@ -86,10 +89,18 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): return seconds_to_next_minute - def raise_rate_limit_error(self) -> HTTPException: + def raise_rate_limit_error( + self, additional_details: Optional[str] = None + ) -> HTTPException: + """ + Raise an HTTPException with a 429 status code and a retry-after header + """ + error_message = "Max parallel request limit reached" + if additional_details is not None: + error_message = error_message + " " + additional_details raise HTTPException( status_code=429, - detail="Max parallel request limit reached.", + detail=f"Max parallel request limit reached {additional_details}", headers={"retry-after": str(self.time_to_next_minute())}, ) @@ -130,7 +141,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): current_global_requests = 1 # if above -> raise error if current_global_requests >= global_max_parallel_requests: - return self.raise_rate_limit_error() + return self.raise_rate_limit_error( + additional_details=f"Hit Global Limit: Limit={global_max_parallel_requests}, current: {current_global_requests}" + ) # if below -> increment else: await self.internal_usage_cache.async_increment_cache( @@ -158,7 +171,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): ): pass elif max_parallel_requests == 0 or tpm_limit == 0 or rpm_limit == 0: - return self.raise_rate_limit_error() + return self.raise_rate_limit_error( + additional_details=f"Hit limit for api_key: {api_key}. max_parallel_requests: {max_parallel_requests}, tpm_limit: {tpm_limit}, rpm_limit: {rpm_limit}" + ) elif current is None: new_val = { "current_requests": 1, @@ -183,7 +198,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): request_count_api_key, new_val ) else: - return self.raise_rate_limit_error() + return self.raise_rate_limit_error( + additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} " + ) # check if REQUEST ALLOWED for user_id user_id = user_api_key_dict.user_id @@ -215,6 +232,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): request_count_api_key=request_count_api_key, tpm_limit=user_tpm_limit, rpm_limit=user_rpm_limit, + rate_limit_type="user", ) # TEAM RATE LIMITS @@ -242,6 +260,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): request_count_api_key=request_count_api_key, tpm_limit=team_tpm_limit, rpm_limit=team_rpm_limit, + rate_limit_type="team", ) # End-User Rate Limits @@ -274,6 +293,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): request_count_api_key=request_count_api_key, tpm_limit=end_user_tpm_limit, rpm_limit=end_user_rpm_limit, + rate_limit_type="customer", ) return From 159a880dcc5ba13cbd5c8384505d5985e7bd2ea3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:06:00 -0700 Subject: [PATCH 081/655] fix /v1/batches POST --- litellm/proxy/proxy_server.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1f35a06f0..1ec2b3814 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -4808,10 +4808,18 @@ async def create_batch( """ global proxy_logging_obj data: Dict = {} + try: - # Use orjson to parse JSON data, orjson speeds up requests significantly - form_data = await request.form() - data = {key: value for key, value in form_data.items() if key != "file"} + body = await request.body() + body_str = body.decode() + try: + data = ast.literal_eval(body_str) + except: + data = json.loads(body_str) + + verbose_proxy_logger.debug( + "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)), + ) # Include original request and headers in the data data = await add_litellm_data_to_request( @@ -4915,10 +4923,18 @@ async def retrieve_batch( """ global proxy_logging_obj data: Dict = {} + data = {} try: - # Use orjson to parse JSON data, orjson speeds up requests significantly - form_data = await request.form() - data = {key: value for key, value in form_data.items() if key != "file"} + body = await request.body() + body_str = body.decode() + try: + data = ast.literal_eval(body_str) + except: + data = json.loads(body_str) + + verbose_proxy_logger.debug( + "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)), + ) # Include original request and headers in the data data = await add_litellm_data_to_request( From 56ce7e892d3ed3966d7f477e1cd441ff490fefa0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:08:54 -0700 Subject: [PATCH 082/655] fix batches inserting metadata --- litellm/proxy/litellm_pre_call_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 7384dc30b..ffea850a3 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -39,6 +39,8 @@ def _get_metadata_variable_name(request: Request) -> str: """ if "thread" in request.url.path or "assistant" in request.url.path: return "litellm_metadata" + if "batches" in request.url.path: + return "litellm_metadata" if "/v1/messages" in request.url.path: # anthropic API has a field called metadata return "litellm_metadata" From 12729ceece68db1888d67cf482511066e05cfb6d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:09:49 -0700 Subject: [PATCH 083/655] test - batches endpoint --- tests/test_openai_batches_endpoint.py | 41 +++++++++++++++++++++++++++ tests/test_openai_files_endpoints.py | 4 +-- 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 tests/test_openai_batches_endpoint.py diff --git a/tests/test_openai_batches_endpoint.py b/tests/test_openai_batches_endpoint.py new file mode 100644 index 000000000..f996c7e8b --- /dev/null +++ b/tests/test_openai_batches_endpoint.py @@ -0,0 +1,41 @@ +# What this tests ? +## Tests /batches endpoints +import pytest +import asyncio +import aiohttp, openai +from openai import OpenAI, AsyncOpenAI +from typing import Optional, List, Union +from test_openai_files_endpoints import upload_file, delete_file + + +BASE_URL = "http://localhost:4000" # Replace with your actual base URL +API_KEY = "sk-1234" # Replace with your actual API key + + +async def create_batch(session, input_file_id, endpoint, completion_window): + url = f"{BASE_URL}/v1/batches" + headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"} + payload = { + "input_file_id": input_file_id, + "endpoint": endpoint, + "completion_window": completion_window, + } + + async with session.post(url, headers=headers, json=payload) as response: + assert response.status == 200, f"Expected status 200, got {response.status}" + result = await response.json() + print(f"Batch creation successful. Batch ID: {result.get('id', 'N/A')}") + return result + + +@pytest.mark.asyncio +async def test_file_operations(): + async with aiohttp.ClientSession() as session: + # Test file upload and get file_id + file_id = await upload_file(session, purpose="batch") + + batch_id = await create_batch(session, file_id, "/v1/chat/completions", "24h") + assert batch_id is not None + + # Test delete file + await delete_file(session, file_id) diff --git a/tests/test_openai_files_endpoints.py b/tests/test_openai_files_endpoints.py index d3922ab69..1444b8a70 100644 --- a/tests/test_openai_files_endpoints.py +++ b/tests/test_openai_files_endpoints.py @@ -30,11 +30,11 @@ async def test_file_operations(): await delete_file(session, file_id) -async def upload_file(session): +async def upload_file(session, purpose="fine-tune"): url = f"{BASE_URL}/v1/files" headers = {"Authorization": f"Bearer {API_KEY}"} data = aiohttp.FormData() - data.add_field("purpose", "fine-tune") + data.add_field("purpose", purpose) data.add_field( "file", b'{"prompt": "Hello", "completion": "Hi"}', filename="mydata.jsonl" ) From f627fa9b40c425377841539b5664d42c39d1a4a3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:23:15 -0700 Subject: [PATCH 084/655] fix for GET /v1/batches{batch_id:path} --- litellm/proxy/proxy_server.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1ec2b3814..1bdbadd83 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -4891,12 +4891,12 @@ async def create_batch( @router.get( - "/v1/batches{batch_id}", + "/v1/batches{batch_id:path}", dependencies=[Depends(user_api_key_auth)], tags=["batch"], ) @router.get( - "/batches{batch_id}", + "/batches{batch_id:path}", dependencies=[Depends(user_api_key_auth)], tags=["batch"], ) @@ -4923,29 +4923,7 @@ async def retrieve_batch( """ global proxy_logging_obj data: Dict = {} - data = {} try: - body = await request.body() - body_str = body.decode() - try: - data = ast.literal_eval(body_str) - except: - data = json.loads(body_str) - - verbose_proxy_logger.debug( - "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)), - ) - - # Include original request and headers in the data - data = await add_litellm_data_to_request( - data=data, - request=request, - general_settings=general_settings, - user_api_key_dict=user_api_key_dict, - version=version, - proxy_config=proxy_config, - ) - _retrieve_batch_request = RetrieveBatchRequest( batch_id=batch_id, ) From 2541d5f6259514a9f40a41368cab152907849ff9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:26:39 -0700 Subject: [PATCH 085/655] add verbose_logger.debug to retrieve batch --- litellm/llms/openai.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index fae8a448a..94000233c 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -24,6 +24,7 @@ from pydantic import BaseModel from typing_extensions import overload, override import litellm +from litellm._logging import verbose_logger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.types.utils import ProviderField from litellm.utils import ( @@ -2534,6 +2535,7 @@ class OpenAIBatchesAPI(BaseLLM): retrieve_batch_data: RetrieveBatchRequest, openai_client: AsyncOpenAI, ) -> Batch: + verbose_logger.debug("retrieving batch, args= %s", retrieve_batch_data) response = await openai_client.batches.retrieve(**retrieve_batch_data) return response From 812dd5e162ddcc5ee00793a48446241382b818e6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:40:10 -0700 Subject: [PATCH 086/655] test get batches by id --- tests/test_openai_batches_endpoint.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/test_openai_batches_endpoint.py b/tests/test_openai_batches_endpoint.py index f996c7e8b..75e3c3f88 100644 --- a/tests/test_openai_batches_endpoint.py +++ b/tests/test_openai_batches_endpoint.py @@ -28,14 +28,37 @@ async def create_batch(session, input_file_id, endpoint, completion_window): return result +async def get_batch_by_id(session, batch_id): + url = f"{BASE_URL}/v1/batches/{batch_id}" + headers = {"Authorization": f"Bearer {API_KEY}"} + + async with session.get(url, headers=headers) as response: + if response.status == 200: + result = await response.json() + return result + else: + print(f"Error: Failed to get batch. Status code: {response.status}") + return None + + @pytest.mark.asyncio -async def test_file_operations(): +async def test_batches_operations(): async with aiohttp.ClientSession() as session: # Test file upload and get file_id file_id = await upload_file(session, purpose="batch") - batch_id = await create_batch(session, file_id, "/v1/chat/completions", "24h") + create_batch_response = await create_batch( + session, file_id, "/v1/chat/completions", "24h" + ) + batch_id = create_batch_response.get("id") assert batch_id is not None + # Test get batch + get_batch_response = await get_batch_by_id(session, batch_id) + print("response from get batch", get_batch_response) + + assert get_batch_response["id"] == batch_id + assert get_batch_response["input_file_id"] == file_id + # Test delete file await delete_file(session, file_id) From f4048bc89055c18c8ce73856f8f6258d9d05c012 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:41:53 -0700 Subject: [PATCH 087/655] docs batches api --- docs/my-website/docs/batches.md | 81 +++++++++++++++++---------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md index 51f3bb5ca..6956a47be 100644 --- a/docs/my-website/docs/batches.md +++ b/docs/my-website/docs/batches.md @@ -18,6 +18,47 @@ Call an existing Assistant. + + +```bash +$ export OPENAI_API_KEY="sk-..." + +$ litellm + +# RUNNING on http://0.0.0.0:4000 +``` + +**Create File for Batch Completion** + +```shell +curl https://api.openai.com/v1/files \ + -H "Authorization: Bearer sk-1234" \ + -F purpose="batch" \ + -F file="@mydata.jsonl" +``` + +**Create Batch Request** + +```bash +curl http://localhost:4000/v1/batches \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "input_file_id": "file-abc123", + "endpoint": "/v1/chat/completions", + "completion_window": "24h" + }' +``` + +**Retrieve the Specific Batch** + +```bash +curl http://localhost:4000/v1/batches/batch_abc123 \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ +``` + + **Create File for Batch Completion** @@ -78,47 +119,7 @@ print("file content = ", file_content) ``` - -```bash -$ export OPENAI_API_KEY="sk-..." - -$ litellm - -# RUNNING on http://0.0.0.0:4000 -``` - -**Create File for Batch Completion** - -```shell -curl https://api.openai.com/v1/files \ - -H "Authorization: Bearer sk-1234" \ - -F purpose="batch" \ - -F file="@mydata.jsonl" -``` - -**Create Batch Request** - -```bash -curl http://localhost:4000/v1/batches \ - -H "Authorization: Bearer sk-1234" \ - -H "Content-Type: application/json" \ - -d '{ - "input_file_id": "file-abc123", - "endpoint": "/v1/chat/completions", - "completion_window": "24h" - }' -``` - -**Retrieve the Specific Batch** - -```bash -curl http://localhost:4000/v1/batches/batch_abc123 \ - -H "Authorization: Bearer sk-1234" \ - -H "Content-Type: application/json" \ -``` - - ## [👉 Proxy API Reference](https://litellm-api.up.railway.app/#/batch) From dd37d1d032cc1a6091cae73c6e9a3af11ee3db09 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:42:45 -0700 Subject: [PATCH 088/655] use correct link on http://localhost:4000 --- docs/my-website/docs/batches.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md index 6956a47be..91cc86bb4 100644 --- a/docs/my-website/docs/batches.md +++ b/docs/my-website/docs/batches.md @@ -31,7 +31,7 @@ $ litellm **Create File for Batch Completion** ```shell -curl https://api.openai.com/v1/files \ +curl http://localhost:4000/v1/files \ -H "Authorization: Bearer sk-1234" \ -F purpose="batch" \ -F file="@mydata.jsonl" From 90648bee6082682932b02441d7942a3402755377 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:50:44 -0700 Subject: [PATCH 089/655] docs batches API --- docs/my-website/docs/batches.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md index 91cc86bb4..b5386a900 100644 --- a/docs/my-website/docs/batches.md +++ b/docs/my-website/docs/batches.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Batches API +# [BETA] Batches API Covers Batches, Files From f8b9c7128e48536468415cb9ae991acc10ced6db Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 18:51:13 -0700 Subject: [PATCH 090/655] docs batches --- docs/my-website/docs/batches.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md index b5386a900..2199e318f 100644 --- a/docs/my-website/docs/batches.md +++ b/docs/my-website/docs/batches.md @@ -8,8 +8,6 @@ Covers Batches, Files ## Quick Start -Call an existing Assistant. - - Create File for Batch Completion - Create Batch Request From fe0b55f2cac9290687134f2d3d6ddce2ce02cf41 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 19:03:42 -0700 Subject: [PATCH 091/655] fix(utils.py): fix cache hits for streaming Fixes https://github.com/BerriAI/litellm/issues/4109 --- litellm/integrations/opentelemetry.py | 2 +- litellm/litellm_core_utils/litellm_logging.py | 4 +- litellm/proxy/_new_secret_config.yaml | 4 ++ litellm/tests/test_proxy_server.py | 1 + litellm/utils.py | 47 +++++++++++++------ 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index c47911b4f..ef14ad679 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -463,7 +463,7 @@ class OpenTelemetry(CustomLogger): ############################################# # OTEL Attributes for the RAW Request to https://docs.anthropic.com/en/api/messages - if complete_input_dict: + if complete_input_dict and isinstance(complete_input_dict, dict): for param, val in complete_input_dict.items(): if not isinstance(val, str): val = str(val) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 0785933aa..85b6adc1e 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1220,7 +1220,9 @@ class Logging: """ Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. """ - print_verbose("Logging Details LiteLLM-Async Success Call") + print_verbose( + "Logging Details LiteLLM-Async Success Call, cache_hit={}".format(cache_hit) + ) start_time, end_time, result = self._success_handler_helper_fn( start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit ) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index deec60b43..c12847736 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -2,3 +2,7 @@ model_list: - model_name: "*" litellm_params: model: "*" + +litellm_settings: + success_callback: ["logfire"] + cache: true \ No newline at end of file diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index e088f2055..0e5431c3f 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -625,6 +625,7 @@ def test_chat_completion_optional_params(mock_acompletion, client_no_auth): # Run the test # test_chat_completion_optional_params() + # Test Reading config.yaml file from litellm.proxy.proxy_server import ProxyConfig diff --git a/litellm/utils.py b/litellm/utils.py index 7c22953bc..a8ef6119b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -10009,6 +10009,12 @@ class CustomStreamWrapper: return model_response def __next__(self): + cache_hit = False + if ( + self.custom_llm_provider is not None + and self.custom_llm_provider == "cached_response" + ): + cache_hit = True try: if self.completion_stream is None: self.fetch_sync_stream() @@ -10073,7 +10079,8 @@ class CustomStreamWrapper: response.usage = complete_streaming_response.usage # type: ignore ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(response,) + target=self.logging_obj.success_handler, + args=(response, None, None, cache_hit), ).start() # log response self.sent_stream_usage = True return response @@ -10083,7 +10090,8 @@ class CustomStreamWrapper: processed_chunk = self.finish_reason_handler() ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(processed_chunk,) + target=self.logging_obj.success_handler, + args=(processed_chunk, None, None, cache_hit), ).start() # log response return processed_chunk except Exception as e: @@ -10120,6 +10128,12 @@ class CustomStreamWrapper: return self.completion_stream async def __anext__(self): + cache_hit = False + if ( + self.custom_llm_provider is not None + and self.custom_llm_provider == "cached_response" + ): + cache_hit = True try: if self.completion_stream is None: await self.fetch_stream() @@ -10174,11 +10188,12 @@ class CustomStreamWrapper: continue ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(processed_chunk,) + target=self.logging_obj.success_handler, + args=(processed_chunk, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - processed_chunk, + processed_chunk, cache_hit=cache_hit ) ) self.response_uptil_now += ( @@ -10225,11 +10240,11 @@ class CustomStreamWrapper: ## LOGGING threading.Thread( target=self.logging_obj.success_handler, - args=(processed_chunk,), + args=(processed_chunk, None, None, cache_hit), ).start() # log processed_chunk asyncio.create_task( self.logging_obj.async_success_handler( - processed_chunk, + processed_chunk, cache_hit=cache_hit ) ) @@ -10257,11 +10272,12 @@ class CustomStreamWrapper: response.usage = complete_streaming_response.usage ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(response,) + target=self.logging_obj.success_handler, + args=(response, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - response, + response, cache_hit=cache_hit ) ) self.sent_stream_usage = True @@ -10272,11 +10288,12 @@ class CustomStreamWrapper: processed_chunk = self.finish_reason_handler() ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(processed_chunk,) + target=self.logging_obj.success_handler, + args=(processed_chunk, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - processed_chunk, + processed_chunk, cache_hit=cache_hit ) ) return processed_chunk @@ -10295,11 +10312,12 @@ class CustomStreamWrapper: response.usage = complete_streaming_response.usage ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(response,) + target=self.logging_obj.success_handler, + args=(response, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - response, + response, cache_hit=cache_hit ) ) self.sent_stream_usage = True @@ -10310,11 +10328,12 @@ class CustomStreamWrapper: processed_chunk = self.finish_reason_handler() ## LOGGING threading.Thread( - target=self.logging_obj.success_handler, args=(processed_chunk,) + target=self.logging_obj.success_handler, + args=(processed_chunk, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - processed_chunk, + processed_chunk, cache_hit=cache_hit ) ) return processed_chunk From fe7f78fbf6ca18963578ff07ac8858d2de71eae1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 20:50:18 -0700 Subject: [PATCH 092/655] feat(vertex_httpx.py): support logging vertex ai safety results to langfuse Closes https://github.com/BerriAI/litellm/issues/3230 --- litellm/litellm_core_utils/litellm_logging.py | 20 +++++++++++++++++++ litellm/llms/vertex_httpx.py | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 85b6adc1e..852f1a2d9 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -529,6 +529,7 @@ class Logging: or isinstance(result, TextCompletionResponse) or isinstance(result, HttpxBinaryResponseContent) # tts ): + ## RESPONSE COST ## custom_pricing = use_custom_pricing_for_model( litellm_params=self.litellm_params ) @@ -548,6 +549,25 @@ class Logging: custom_pricing=custom_pricing, ) ) + + ## HIDDEN PARAMS ## + if hasattr(result, "_hidden_params"): + # add to metadata for logging + if self.model_call_details.get("litellm_params") is not None: + self.model_call_details["litellm_params"].setdefault( + "metadata", {} + ) + if ( + self.model_call_details["litellm_params"]["metadata"] + is None + ): + self.model_call_details["litellm_params"][ + "metadata" + ] = {} + + self.model_call_details["litellm_params"]["metadata"][ + "hidden_params" + ] = result._hidden_params else: # streaming chunks + image gen. self.model_call_details["response_cost"] = None diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index 93d8f4282..bd3367830 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -688,6 +688,7 @@ class VertexLLM(BaseLLM): try: ## CHECK IF GROUNDING METADATA IN REQUEST grounding_metadata: List[dict] = [] + safety_ratings: List = [] ## GET TEXT ## chat_completion_message = {"role": "assistant"} content_str = "" @@ -699,6 +700,8 @@ class VertexLLM(BaseLLM): if "groundingMetadata" in candidate: grounding_metadata.append(candidate["groundingMetadata"]) + if "safetyRatings" in candidate: + safety_ratings.append(candidate["safetyRatings"]) if "text" in candidate["content"]["parts"][0]: content_str = candidate["content"]["parts"][0]["text"] @@ -749,6 +752,10 @@ class VertexLLM(BaseLLM): model_response._hidden_params["vertex_ai_grounding_metadata"] = ( grounding_metadata ) + + ## ADD SAFETY RATINGS ## + model_response._hidden_params["vertex_ai_safety_results"] = safety_ratings + except Exception as e: raise VertexAIError( message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format( From a264d1ca8cedd15c1494cb047b3376b0d68cf1db Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 20:54:59 -0700 Subject: [PATCH 093/655] feat(vertex_httpx.py): support logging citation metadata Closes https://github.com/BerriAI/litellm/issues/3230 --- litellm/llms/vertex_httpx.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index bd3367830..142cabbe9 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -689,6 +689,7 @@ class VertexLLM(BaseLLM): ## CHECK IF GROUNDING METADATA IN REQUEST grounding_metadata: List[dict] = [] safety_ratings: List = [] + citation_metadata: List = [] ## GET TEXT ## chat_completion_message = {"role": "assistant"} content_str = "" @@ -702,6 +703,9 @@ class VertexLLM(BaseLLM): if "safetyRatings" in candidate: safety_ratings.append(candidate["safetyRatings"]) + + if "citationMetadata" in candidate: + citation_metadata.append(candidate["citationMetadata"]) if "text" in candidate["content"]["parts"][0]: content_str = candidate["content"]["parts"][0]["text"] @@ -756,6 +760,11 @@ class VertexLLM(BaseLLM): ## ADD SAFETY RATINGS ## model_response._hidden_params["vertex_ai_safety_results"] = safety_ratings + ## ADD CITATION METADATA ## + model_response._hidden_params["vertex_ai_citation_metadata"] = ( + citation_metadata + ) + except Exception as e: raise VertexAIError( message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format( From 548adea8cf11b36fe5dfa0b80f9dd0f46db6de04 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 21:04:31 -0700 Subject: [PATCH 094/655] add litellm_header_name endpoint --- .../key_management_endpoints.py | 19 ++ .../src/components/networking.tsx | 176 ++++++++++-------- 2 files changed, 120 insertions(+), 75 deletions(-) diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 0e4696e44..824c40de1 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -914,3 +914,22 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None) verbose_proxy_logger.debug(traceback.format_exc()) raise e return deleted_tokens + + +@router.get( + "/litellm_header_name", + include_in_schema=False, +) +async def get_litellm_header_name(): + """ + Used by LiteLLM Admin UI + + returns the header name that should be used for the Authorization header on requests to litellm + """ + from litellm.proxy.proxy_server import general_settings + + if "litellm_key_header_name" in general_settings: + return {"litellm_key_header_name": general_settings["litellm_key_header_name"]} + else: + # default value + return {"litellm_key_header_name": "Authorization"} diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index f31e26d4d..8527b39a2 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -36,6 +36,32 @@ const handleError = async (errorData: string) => { } }; + +export const getLiteLLMHeaderName = async () => { + try { + const url = proxyBaseUrl ? `${proxyBaseUrl}/litellm_header_name` : '/litellm_header_name'; + const response = await fetch(url, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + console.log('Received LiteLLM header name:', data); + return data.litellm_key_header_name; + } catch (error) { + console.error('Failed to get LiteLLM header name:', error); + throw error; + } +}; + +const litellm_key_header_name = getLiteLLMHeaderName(); + export const modelCostMap = async ( accessToken: string, ) => { @@ -45,7 +71,7 @@ export const modelCostMap = async ( url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, } @@ -68,7 +94,7 @@ export const modelCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -107,7 +133,7 @@ export const modelSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -138,7 +164,7 @@ export const modelDeleteCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -180,7 +206,7 @@ export const budgetDeleteCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -215,7 +241,7 @@ export const budgetCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -251,7 +277,7 @@ export const invitationCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -290,7 +316,7 @@ export const invitationClaimCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -328,7 +354,7 @@ export const alertingSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -385,7 +411,7 @@ export const keyCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -447,7 +473,7 @@ export const userCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -481,7 +507,7 @@ export const keyDeleteCall = async (accessToken: String, user_key: String) => { const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -513,7 +539,7 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => { const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -566,7 +592,7 @@ export const userInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -601,7 +627,7 @@ export const teamInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -633,7 +659,7 @@ export const getTotalSpendCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -698,7 +724,7 @@ export const claimOnboardingToken = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -740,7 +766,7 @@ export const modelInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -788,7 +814,7 @@ export const modelHubCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -819,7 +845,7 @@ export const getAllowedIPs = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -848,7 +874,7 @@ export const addAllowedIP = async (accessToken: String, ip: String) => { const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ ip: ip }), @@ -878,7 +904,7 @@ export const deleteAllowedIP = async (accessToken: String, ip: String) => { const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ ip: ip }), @@ -920,7 +946,7 @@ export const modelMetricsCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -959,7 +985,7 @@ export const streamingModelMetricsCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1004,7 +1030,7 @@ export const modelMetricsSlowResponsesCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1048,7 +1074,7 @@ export const modelExceptionsCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1083,7 +1109,7 @@ export const modelAvailableCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1113,7 +1139,7 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => { const response = await fetch(`${url}?api_key=${token}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1141,7 +1167,7 @@ export const teamSpendLogsCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1184,7 +1210,7 @@ export const tagsSpendLogsCall = async ( const response = await fetch(`${url}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1212,7 +1238,7 @@ export const allTagNamesCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1240,7 +1266,7 @@ export const allEndUsersCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1278,7 +1304,7 @@ export const userSpendLogsCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1308,7 +1334,7 @@ export const adminSpendLogsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1338,7 +1364,7 @@ export const adminTopKeysCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1386,14 +1412,14 @@ export const adminTopEndUsersCall = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; "Content-Type": string; }; body?: string; // The body is optional and might not be present } = { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }; @@ -1439,12 +1465,12 @@ export const adminspendByProvider = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1482,12 +1508,12 @@ export const adminGlobalActivity = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1523,12 +1549,12 @@ export const adminGlobalCacheActivity = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1564,12 +1590,12 @@ export const adminGlobalActivityPerModel = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1610,12 +1636,12 @@ export const adminGlobalActivityExceptions = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1656,12 +1682,12 @@ export const adminGlobalActivityExceptionsPerDeployment = async ( const requestOptions: { method: string; headers: { - Authorization: string; + litellm_key_header_name: string; }; } = { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, }, }; @@ -1690,7 +1716,7 @@ export const adminTopModelsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1717,7 +1743,7 @@ export const keyInfoCall = async (accessToken: String, keys: String[]) => { const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1747,7 +1773,7 @@ export const spendUsersCall = async (accessToken: String, userID: String) => { const response = await fetch(`${url}?user_id=${userID}`, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1779,7 +1805,7 @@ export const userRequestModelCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1814,7 +1840,7 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1854,7 +1880,7 @@ export const userGetAllUsersCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1883,7 +1909,7 @@ export const getPossibleUserRoles = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1912,7 +1938,7 @@ export const teamCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1948,7 +1974,7 @@ export const keyUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1983,7 +2009,7 @@ export const teamUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2018,7 +2044,7 @@ export const modelUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2062,7 +2088,7 @@ export const teamMemberAddCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2105,7 +2131,7 @@ export const userUpdateUserCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: response_body, @@ -2143,7 +2169,7 @@ export const PredictedSpendLogsCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2179,7 +2205,7 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2218,7 +2244,7 @@ export const serviceHealthCheck = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2253,7 +2279,7 @@ export const getBudgetList = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2286,7 +2312,7 @@ export const getBudgetSettings = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2324,7 +2350,7 @@ export const getCallbacksCall = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2355,7 +2381,7 @@ export const getGeneralSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2389,7 +2415,7 @@ export const getConfigFieldSetting = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2427,7 +2453,7 @@ export const updateConfigFieldSetting = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify(formData), @@ -2467,7 +2493,7 @@ export const deleteConfigFieldSetting = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify(formData), @@ -2502,7 +2528,7 @@ export const setCallbacksCall = async ( const response = await fetch(url, { method: "POST", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2537,7 +2563,7 @@ export const healthCheckCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2573,7 +2599,7 @@ export const getProxyBaseUrlAndLogoutUrl = async ( const response = await fetch(url, { method: "GET", headers: { - Authorization: `Bearer ${accessToken}`, + litellm_key_header_name: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); From 2501b4eccd736412d802261c7a076a60610e54f7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 21:34:42 -0700 Subject: [PATCH 095/655] feat link to model cost map on swagger --- litellm/proxy/proxy_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1bdbadd83..b393b2a74 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -279,6 +279,8 @@ ui_link = f"/ui/" ui_message = ( f"👉 [```LiteLLM Admin Panel on /ui```]({ui_link}). Create, Edit Keys with SSO" ) +ui_message += f"\n\n💸 [```LiteLLM Model Cost Map```](https://models.litellm.ai/)." + custom_swagger_message = f"[**Customize Swagger Docs**](https://docs.litellm.ai/docs/proxy/enterprise#swagger-docs---custom-routes--branding)" ### CUSTOM BRANDING [ENTERPRISE FEATURE] ### From f03769e2a41de4f27d5ef451fa820831c91f5725 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 21:35:54 -0700 Subject: [PATCH 096/655] docs fix link https://models.litellm.ai/ --- docs/my-website/docusaurus.config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index 03473c12b..33ac8cd84 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -136,7 +136,7 @@ const config = { to: "docs/hosted" }, { - href: 'https://contextlengthof.com/', + href: 'https://models.litellm.ai/', label: '💸 LLM Model Cost Map', position: 'right', }, From 1506e74332fa6c8e80a7155f6bb0a9ac16c871e1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 26 Jul 2024 21:37:33 -0700 Subject: [PATCH 097/655] =?UTF-8?q?bump:=20version=201.42.3=20=E2=86=92=20?= =?UTF-8?q?1.42.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3bc808e7e..6e7f43608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.42.3" +version = "1.42.4" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.42.3" +version = "1.42.4" version_files = [ "pyproject.toml:^version" ] From b25d4a8cb32d50dae07e7c994de9306123890974 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 21:51:54 -0700 Subject: [PATCH 098/655] feat(ollama_chat.py): support ollama tool calling Closes https://github.com/BerriAI/litellm/issues/4812 --- litellm/llms/ollama_chat.py | 49 +++++++++++++++++---------- litellm/proxy/_new_secret_config.yaml | 10 +++--- litellm/router.py | 12 +++++++ litellm/types/utils.py | 1 + litellm/utils.py | 10 +++++- 5 files changed, 57 insertions(+), 25 deletions(-) diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index ebd0f22fb..a6b975026 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -149,7 +149,9 @@ class OllamaChatConfig: "response_format", ] - def map_openai_params(self, non_default_params: dict, optional_params: dict): + def map_openai_params( + self, model: str, non_default_params: dict, optional_params: dict + ): for param, value in non_default_params.items(): if param == "max_tokens": optional_params["num_predict"] = value @@ -170,16 +172,26 @@ class OllamaChatConfig: ### FUNCTION CALLING LOGIC ### if param == "tools": # ollama actually supports json output - optional_params["format"] = "json" - litellm.add_function_to_prompt = ( - True # so that main.py adds the function call to the prompt - ) - optional_params["functions_unsupported_model"] = value + ## CHECK IF MODEL SUPPORTS TOOL CALLING ## + try: + model_info = litellm.get_model_info( + model=model, custom_llm_provider="ollama_chat" + ) + if model_info.get("supports_function_calling") is True: + optional_params["tools"] = value + else: + raise Exception + except Exception: + optional_params["format"] = "json" + litellm.add_function_to_prompt = ( + True # so that main.py adds the function call to the prompt + ) + optional_params["functions_unsupported_model"] = value - if len(optional_params["functions_unsupported_model"]) == 1: - optional_params["function_name"] = optional_params[ - "functions_unsupported_model" - ][0]["function"]["name"] + if len(optional_params["functions_unsupported_model"]) == 1: + optional_params["function_name"] = optional_params[ + "functions_unsupported_model" + ][0]["function"]["name"] if param == "functions": # ollama actually supports json output @@ -198,11 +210,11 @@ class OllamaChatConfig: # ollama implementation def get_ollama_response( model_response: litellm.ModelResponse, + messages: list, + optional_params: dict, api_base="http://localhost:11434", api_key: Optional[str] = None, model="llama2", - messages=None, - optional_params=None, logging_obj=None, acompletion: bool = False, encoding=None, @@ -223,6 +235,7 @@ def get_ollama_response( stream = optional_params.pop("stream", False) format = optional_params.pop("format", None) function_name = optional_params.pop("function_name", None) + tools = optional_params.pop("tools", None) for m in messages: if "role" in m and m["role"] == "tool": @@ -236,6 +249,8 @@ def get_ollama_response( } if format is not None: data["format"] = format + if tools is not None: + data["tools"] = tools ## LOGGING logging_obj.pre_call( input=None, @@ -499,7 +514,8 @@ async def ollama_acompletion( ## RESPONSE OBJECT model_response.choices[0].finish_reason = "stop" - if data.get("format", "") == "json": + + if data.get("format", "") == "json" and function_name is not None: function_call = json.loads(response_json["message"]["content"]) message = litellm.Message( content=None, @@ -519,11 +535,8 @@ async def ollama_acompletion( model_response.choices[0].message = message # type: ignore model_response.choices[0].finish_reason = "tool_calls" else: - model_response.choices[0].message.content = response_json[ # type: ignore - "message" - ][ - "content" - ] + _message = litellm.Message(**response_json["message"]) + model_response.choices[0].message = _message # type: ignore model_response.created = int(time.time()) model_response.model = "ollama_chat/" + data["model"] diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index c12847736..240c3d436 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,8 +1,6 @@ model_list: - - model_name: "*" + - model_name: "mistral" litellm_params: - model: "*" - -litellm_settings: - success_callback: ["logfire"] - cache: true \ No newline at end of file + model: "ollama_chat/llama3.1" + model_info: + supports_function_calling: true \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index eff5f94db..d72f3ea5e 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3469,6 +3469,18 @@ class Router: model_info=_model_info, ) + ## REGISTER MODEL INFO IN LITELLM MODEL COST MAP + _model_name = deployment.litellm_params.model + if deployment.litellm_params.custom_llm_provider is not None: + _model_name = ( + deployment.litellm_params.custom_llm_provider + "/" + _model_name + ) + litellm.register_model( + model_cost={ + _model_name: _model_info, + } + ) + deployment = self._add_deployment(deployment=deployment) model = deployment.to_json(exclude_none=True) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 88bfa19e9..e64099aa6 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -74,6 +74,7 @@ class ModelInfo(TypedDict, total=False): supports_system_messages: Optional[bool] supports_response_schema: Optional[bool] supports_vision: Optional[bool] + supports_function_calling: Optional[bool] class GenericStreamingChunk(TypedDict): diff --git a/litellm/utils.py b/litellm/utils.py index a8ef6119b..358904677 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2089,6 +2089,7 @@ def supports_function_calling(model: str) -> bool: Raises: Exception: If the given model is not found in model_prices_and_context_window.json. """ + if model in litellm.model_cost: model_info = litellm.model_cost[model] if model_info.get("supports_function_calling", False) is True: @@ -3293,7 +3294,9 @@ def get_optional_params( _check_valid_arg(supported_params=supported_params) optional_params = litellm.OllamaChatConfig().map_openai_params( - non_default_params=non_default_params, optional_params=optional_params + model=model, + non_default_params=non_default_params, + optional_params=optional_params, ) elif custom_llm_provider == "nlp_cloud": supported_params = get_supported_openai_params( @@ -4877,6 +4880,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod supports_system_messages: Optional[bool] supports_response_schema: Optional[bool] supports_vision: Optional[bool] + supports_function_calling: Optional[bool] Raises: Exception: If the model is not mapped yet. @@ -4951,6 +4955,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod supported_openai_params=supported_openai_params, supports_system_messages=None, supports_response_schema=None, + supports_function_calling=None, ) else: """ @@ -5041,6 +5046,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod "supports_response_schema", None ), supports_vision=_model_info.get("supports_vision", False), + supports_function_calling=_model_info.get( + "supports_function_calling", False + ), ) except Exception: raise Exception( From 77fe8f57cf04b8235527efe756b418c57d98abd1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 22:12:52 -0700 Subject: [PATCH 099/655] docs(ollama.md): add ollama tool calling to docs --- docs/my-website/docs/providers/ollama.md | 117 ++++++++++++++++++ ...odel_prices_and_context_window_backup.json | 10 ++ litellm/proxy/_new_secret_config.yaml | 2 +- model_prices_and_context_window.json | 10 ++ 4 files changed, 138 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md index c1c8fc57c..63b79fe3a 100644 --- a/docs/my-website/docs/providers/ollama.md +++ b/docs/my-website/docs/providers/ollama.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Ollama LiteLLM supports all models from [Ollama](https://github.com/ollama/ollama) @@ -84,6 +87,120 @@ response = completion( ) ``` +## Example Usage - Tool Calling + +To use ollama tool calling, pass `tools=[{..}]` to `litellm.completion()` + + + + +```python +from litellm import completion +import litellm + +## [OPTIONAL] REGISTER MODEL - not all ollama models support function calling, litellm defaults to json mode tool calls if native tool calling not supported. + +# litellm.register_model(model_cost={ +# "ollama_chat/llama3.1": { +# "supports_function_calling": true +# }, +# }) + +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + } + } +] + +messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] + + +response = completion( + model="ollama_chat/llama3.1", + messages=messages, + tools=tools +) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: "llama3.1" + litellm_params: + model: "ollama_chat/llama3.1" + model_info: + supports_function_calling: true +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "llama3.1", + "messages": [ + { + "role": "user", + "content": "What'\''s the weather like in Boston today?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "tool_choice": "auto", + "stream": true +}' +``` + + + ## Using ollama `api/chat` In order to send ollama requests to `POST /api/chat` on your ollama server, set the model prefix to `ollama_chat` diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d4985bffd..2689d0566 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3956,6 +3956,16 @@ "litellm_provider": "ollama", "mode": "chat" }, + "ollama/llama3.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, "ollama/mistral": { "max_tokens": 8192, "max_input_tokens": 8192, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 240c3d436..34bf7d89a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,5 +1,5 @@ model_list: - - model_name: "mistral" + - model_name: "llama3.1" litellm_params: model: "ollama_chat/llama3.1" model_info: diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d4985bffd..2689d0566 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3956,6 +3956,16 @@ "litellm_provider": "ollama", "mode": "chat" }, + "ollama/llama3.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, "ollama/mistral": { "max_tokens": 8192, "max_input_tokens": 8192, From 48a1a2ba26edeb16cc36c5b475f4d67eb5629018 Mon Sep 17 00:00:00 2001 From: yujonglee Date: Sat, 27 Jul 2024 15:05:17 +0900 Subject: [PATCH 100/655] fix --- docs/my-website/package.json | 4 ++-- docs/my-website/yarn.lock | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/my-website/package.json b/docs/my-website/package.json index 6fc4ea23c..0cf6e2729 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -18,8 +18,8 @@ "@docusaurus/plugin-google-gtag": "^2.4.1", "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", - "@getcanary/docusaurus-pagefind": "^0.0.11", - "@getcanary/web": "^0.0.54", + "@getcanary/docusaurus-pagefind": "^0.0.12", + "@getcanary/web": "^0.0.55", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index 7e0b432a5..650e7808e 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1961,19 +1961,19 @@ resolved "https://registry.yarnpkg.com/@floating-ui/utils/-/utils-0.2.5.tgz#105c37d9d9620ce69b7f692a20c821bf1ad2cbf9" integrity sha512-sTcG+QZ6fdEUObICavU+aB3Mp8HY4n14wYHdxK4fXjPmv3PXZZeY5RaguJmGyeH/CJQhX3fqKUtS4qc1LoHwhQ== -"@getcanary/docusaurus-pagefind@^0.0.11": - version "0.0.11" - resolved "https://registry.yarnpkg.com/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.11.tgz#c4938b4f3d0f99c4d46d9b11a8800934f2bd7009" - integrity sha512-CN6nI8I5mdvE4Lt0+T95HNQdH8x6P4b2/T2YWbtjP0EB4TZl78lpuWXB3RZwiY7cY+C+aRL4Jo52SAqrBW2eqQ== +"@getcanary/docusaurus-pagefind@^0.0.12": + version "0.0.12" + resolved "https://registry.yarnpkg.com/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.12.tgz#c843ad66b3703f58a3d27fc0380922406fe03ee0" + integrity sha512-F0OQ0Lb/GltewDEr0w+BgPbNyYpzAQZ/TtuG5rbtC3PnrOL+9pDMe/Gs0kE8AuY1uEd/YQOKr61rbY/k7kkFig== dependencies: cli-progress "^3.12.0" micromatch "^4.0.7" pagefind "^1.1.0" -"@getcanary/web@^0.0.54": - version "0.0.54" - resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.54.tgz#a80d3a93b79beae3216e28391f35da32cac011c1" - integrity sha512-6ghmuusVq7pWNMj3SonRJ9Ncn0Yz2GxdT0pb7LLUJRdQWyxeP5UmnrhQ3jpq4NKzSqaIb8nK4M61Wikfbyr24Q== +"@getcanary/web@^0.0.55": + version "0.0.55" + resolved "https://registry.yarnpkg.com/@getcanary/web/-/web-0.0.55.tgz#8df5de51e3fd89d6334b9d51a37c61dc8136137e" + integrity sha512-DjIhTMeuLZaHT+/h+O6Keg9Gb58frPURpM4lkKrN/wmRMoCnOuly3oXIH2X37YhAoHXi4udDRJ60mtD0UZy0uw== dependencies: "@floating-ui/dom" "^1.6.8" "@lit-labs/observers" "^2.0.2" From c2c5877afa60810715ce0d1bb1a9a0d0c8d4685c Mon Sep 17 00:00:00 2001 From: Idris Mokhtarzada Date: Sat, 27 Jul 2024 02:16:36 -0400 Subject: [PATCH 101/655] Fix Datadog JSON serialization Nested dicts were not being serialized properly --- litellm/integrations/datadog.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index 97ad96a5d..f3170e446 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -1,5 +1,5 @@ #### What this does #### -# On success + failure, log events to Supabase +# On success + failure, log events to Datadog import dotenv, os import requests # type: ignore @@ -15,12 +15,13 @@ def make_json_serializable(payload): if isinstance(value, dict): # recursively sanitize dicts payload[key] = make_json_serializable(value.copy()) - if not isinstance(value, (str, int, float, bool, type(None))): + elif not isinstance(value, (str, int, float, bool, type(None))): # everything else becomes a string payload[key] = str(value) except: # non blocking if it can't cast to a str pass + return payload class DataDogLogger: From 0356decdec8cac2c3485a2a3b25a78d0ce4294df Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 09:16:08 -0700 Subject: [PATCH 102/655] =?UTF-8?q?bump:=20version=201.42.4=20=E2=86=92=20?= =?UTF-8?q?1.42.5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6e7f43608..229082b43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.42.4" +version = "1.42.5" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.42.4" +version = "1.42.5" version_files = [ "pyproject.toml:^version" ] From 1e621f716f70802714f79a052e670b649dbcecce Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 09:28:53 -0700 Subject: [PATCH 103/655] docs(debugging.md): cleanup docs --- docs/my-website/docs/proxy/debugging.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/my-website/docs/proxy/debugging.md b/docs/my-website/docs/proxy/debugging.md index 38680982a..5cca65417 100644 --- a/docs/my-website/docs/proxy/debugging.md +++ b/docs/my-website/docs/proxy/debugging.md @@ -35,6 +35,22 @@ $ litellm --detailed_debug os.environ["LITELLM_LOG"] = "DEBUG" ``` +### Debug Logs + +Run the proxy with `--detailed_debug` to view detailed debug logs +```shell +litellm --config /path/to/config.yaml --detailed_debug +``` + +When making requests you should see the POST request sent by LiteLLM to the LLM on the Terminal output +```shell +POST Request Sent from LiteLLM: +curl -X POST \ +https://api.openai.com/v1/chat/completions \ +-H 'content-type: application/json' -H 'Authorization: Bearer sk-qnWGUIW9****************************************' \ +-d '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "this is a test request, write a short poem"}]}' +``` + ## JSON LOGS Set `JSON_LOGS="True"` in your env: From 6f428a16fa503a4d16bcaea61862056493aca2b1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 09:45:58 -0700 Subject: [PATCH 104/655] fix update public key --- litellm/proxy/auth/public_key.pem | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/litellm/proxy/auth/public_key.pem b/litellm/proxy/auth/public_key.pem index 12a69dde2..0962794ac 100644 --- a/litellm/proxy/auth/public_key.pem +++ b/litellm/proxy/auth/public_key.pem @@ -1,9 +1,9 @@ ------BEGIN PUBLIC KEY----- -MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmfBuNiNzDkNWyce23koQ -w0vq3bSVHkq7fd9Sw/U1q7FwRwL221daLTyGWssd8xAoQSFXAJKoBwzJQ9wd+o44 -lfL54E3a61nfjZuF+D9ntpXZFfEAxLVtIahDeQjUz4b/EpgciWIJyUfjCJrQo6LY -eyAZPTGSO8V3zHyaU+CFywq5XCuCnfZqCZeCw051St59A2v8W32mXSCJ+A+x0hYP -yXJyRRFcefSFG5IBuRHr4Y24Vx7NUIAoco5cnxJho9g2z3J/Hb0GKW+oBNvRVumk -nuA2Ljmjh4yI0OoTIW8ZWxemvCCJHSjdfKlMyb+QI4fmeiIUZzP5Au+F561Styqq -YQIDAQAB ------END PUBLIC KEY----- + -----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwcNBabWBZzrDhFAuA4Fh +FhIcA3rF7vrLb8+1yhF2U62AghQp9nStyuJRjxMUuldWgJ1yRJ2s7UffVw5r8DeA +dqXPD+w+3LCNwqJGaIKN08QGJXNArM3QtMaN0RTzAyQ4iibN1r6609W5muK9wGp0 +b1j5+iDUmf0ynItnhvaX6B8Xoaflc3WD/UBdrygLmsU5uR3XC86+/8ILoSZH3HtN +6FJmWhlhjS2TR1cKZv8K5D0WuADTFf5MF8jYFR+uORPj5Pe/EJlLGN26Lfn2QnGu +XgbPF6nCGwZ0hwH1Xkn3xzGaJ4xBEC761wqp5cHxWSDktHyFKnLbP3jVeegjVIHh +pQIDAQAB +-----END PUBLIC KEY----- \ No newline at end of file From 1adf71b9b79e77f313fbdab283c0ee09d11b407f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 09:50:03 -0700 Subject: [PATCH 105/655] feat - clearly show version litellm enterprise --- litellm/proxy/proxy_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 7c7c7eea9..274e28ac2 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -289,7 +289,7 @@ _title = os.getenv("DOCS_TITLE", "LiteLLM API") if premium_user else "LiteLLM AP _description = ( os.getenv( "DOCS_DESCRIPTION", - f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}", + f"Enterprise Edition \n\nProxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}", ) if premium_user else f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}" From 2719860c46d2d0d4ec07bc4ce601a7a62c68fc24 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 10:32:38 -0700 Subject: [PATCH 106/655] build(model_prices_and_context_window.json): add mistral-large on vertex ai pricing --- ...odel_prices_and_context_window_backup.json | 22 +++++++++++++++---- litellm/proxy/_new_secret_config.yaml | 7 +++--- litellm/proxy/auth/handle_jwt.py | 2 +- model_prices_and_context_window.json | 22 +++++++++++++++---- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 2689d0566..7f773040e 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2028,6 +2028,16 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/mistral-large@2407": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", @@ -2994,7 +3004,8 @@ "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "mistral.mistral-large-2407-v1:0": { "max_tokens": 8191, @@ -3003,7 +3014,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000009, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { "max_tokens": 8191, @@ -3075,7 +3087,8 @@ "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { "max_tokens": 8191, @@ -3084,7 +3097,8 @@ "input_cost_per_token": 0.0000104, "output_cost_per_token": 0.0000312, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { "max_tokens": 4096, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 34bf7d89a..b712afaf0 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,6 +1,5 @@ model_list: - - model_name: "llama3.1" + - model_name: "*" litellm_params: - model: "ollama_chat/llama3.1" - model_info: - supports_function_calling: true \ No newline at end of file + model: "*" + diff --git a/litellm/proxy/auth/handle_jwt.py b/litellm/proxy/auth/handle_jwt.py index 200df7317..f8618781f 100644 --- a/litellm/proxy/auth/handle_jwt.py +++ b/litellm/proxy/auth/handle_jwt.py @@ -10,7 +10,6 @@ import json import os from typing import Optional -import jwt from cryptography import x509 from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization @@ -195,6 +194,7 @@ class JWTHandler: if audience is None: decode_options = {"verify_aud": False} + import jwt from jwt.algorithms import RSAAlgorithm header = jwt.get_unverified_header(token) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 2689d0566..7f773040e 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2028,6 +2028,16 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/mistral-large@2407": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", @@ -2994,7 +3004,8 @@ "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "mistral.mistral-large-2407-v1:0": { "max_tokens": 8191, @@ -3003,7 +3014,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000009, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { "max_tokens": 8191, @@ -3075,7 +3087,8 @@ "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { "max_tokens": 8191, @@ -3084,7 +3097,8 @@ "input_cost_per_token": 0.0000104, "output_cost_per_token": 0.0000312, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { "max_tokens": 4096, From 1a8f45e8da8ac22ca9e89dd724d9477f961dc66c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 10:46:52 -0700 Subject: [PATCH 107/655] build(model_prices_and_context_window.json): add mistral nemo + codestral pricing --- ...odel_prices_and_context_window_backup.json | 20 +++++++++++++++++++ model_prices_and_context_window.json | 20 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 7f773040e..0f20f6689 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2038,6 +2038,26 @@ "mode": "chat", "supports_function_calling": true }, + "vertex_ai/mistral-nemo@2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/codestral@2405": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 7f773040e..0f20f6689 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2038,6 +2038,26 @@ "mode": "chat", "supports_function_calling": true }, + "vertex_ai/mistral-nemo@2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/codestral@2405": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", From 61c10e60a4369d1f4fce4f4e2c404990ebb475b4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:08:22 -0700 Subject: [PATCH 108/655] feat - use log_to_opentelemetry for _PROXY_track_cost_callback --- litellm/proxy/proxy_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 274e28ac2..019839d78 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -203,6 +203,7 @@ from litellm.proxy.utils import ( get_error_message_str, get_instance_fn, hash_token, + log_to_opentelemetry, reset_budget, send_email, update_spend, @@ -649,6 +650,7 @@ async def _PROXY_failure_handler( pass +@log_to_opentelemetry async def _PROXY_track_cost_callback( kwargs, # kwargs to completion completion_response: litellm.ModelResponse, # response from completion From 2a89486948e20baf8a21bc71feda21b938a232f2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:12:13 -0700 Subject: [PATCH 109/655] move _get_parent_otel_span_from_kwargs to otel.py --- litellm/caching.py | 11 +---------- litellm/integrations/opentelemetry.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/litellm/caching.py b/litellm/caching.py index 67c15253c..3f14b8468 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -21,6 +21,7 @@ from openai._models import BaseModel as OpenAIObject import litellm from litellm._logging import verbose_logger +from litellm.integrations.opentelemetry import _get_parent_otel_span_from_kwargs from litellm.types.services import ServiceLoggerPayload, ServiceTypes @@ -33,16 +34,6 @@ def print_verbose(print_statement): pass -def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): - try: - if kwargs is None: - return None - _metadata = kwargs.get("metadata") or {} - return _metadata.get("litellm_parent_otel_span") - except: - return None - - class BaseCache: def set_cache(self, key, value, **kwargs): raise NotImplementedError diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index ef14ad679..e44bc6bee 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -703,3 +703,24 @@ class OpenTelemetry(CustomLogger): management_endpoint_span.set_attribute(f"exception", str(_exception)) management_endpoint_span.set_status(Status(StatusCode.ERROR)) management_endpoint_span.end(end_time=_end_time_ns) + + +# Helper functions used for OTEL logging +def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): + try: + if kwargs is None: + return None + litellm_params = kwargs.get("litellm_params") + _metadata = kwargs.get("metadata") or {} + if "litellm_parent_otel_span" in _metadata: + return _metadata["litellm_parent_otel_span"] + elif ( + litellm_params is not None + and litellm_params.get("metadata") is not None + and "litellm_parent_otel_span" in litellm_params.get("metadata", {}) + ): + return litellm_params["metadata"]["litellm_parent_otel_span"] + elif "litellm_parent_otel_span" in kwargs: + return kwargs["litellm_parent_otel_span"] + except: + return None From d5d9ed73af70a7e813cfd65b3def50a049ae4e55 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:14:06 -0700 Subject: [PATCH 110/655] use _get_parent_otel_span_from_kwargs --- litellm/proxy/proxy_config.yaml | 8 +------- litellm/proxy/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 4df510399..c0045e40c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -27,12 +27,6 @@ model_list: mode: audio_speech general_settings: master_key: sk-1234 - alerting: ["slack"] - alerting_threshold: 0.0001 - alert_to_webhook_url: { - "llm_too_slow": "https://hooks.slack.com/services/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", - "llm_requests_hanging": "https://hooks.slack.com/services/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", - } litellm_settings: - success_callback: ["langfuse"] \ No newline at end of file + callbacks: ["otel"] \ No newline at end of file diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index fc47abf9c..682d85539 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -31,6 +31,7 @@ from litellm._service_logger import ServiceLogging, ServiceTypes from litellm.caching import DualCache, RedisCache from litellm.exceptions import RejectedRequestError from litellm.integrations.custom_logger import CustomLogger +from litellm.integrations.opentelemetry import _get_parent_otel_span_from_kwargs from litellm.integrations.slack_alerting import SlackAlerting from litellm.litellm_core_utils.litellm_logging import Logging from litellm.llms.custom_httpx.httpx_handler import HTTPHandler @@ -125,6 +126,27 @@ def log_to_opentelemetry(func): start_time=start_time, end_time=end_time, ) + elif ( + # in litellm custom callbacks kwargs is passed as arg[0] + # https://docs.litellm.ai/docs/observability/custom_callback#callback-functions + args is not None + and len(args) > 0 + ): + passed_kwargs = args[0] + parent_otel_span = _get_parent_otel_span_from_kwargs( + kwargs=passed_kwargs + ) + if parent_otel_span is not None: + from litellm.proxy.proxy_server import proxy_logging_obj + + await proxy_logging_obj.service_logging_obj.async_service_success_hook( + service=ServiceTypes.DB, + call_type=func.__name__, + parent_otel_span=parent_otel_span, + duration=0.0, + start_time=start_time, + end_time=end_time, + ) # end of logging to otel return result except Exception as e: From 32eb3bd719bff274d867225ad60317e584fee188 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:36:51 -0700 Subject: [PATCH 111/655] add new BATCH_WRITE_TO_DB type for service logger --- litellm/types/services.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/litellm/types/services.py b/litellm/types/services.py index 9c3c2120e..62046ff44 100644 --- a/litellm/types/services.py +++ b/litellm/types/services.py @@ -1,7 +1,9 @@ -import uuid, enum -from pydantic import BaseModel, Field +import enum +import uuid from typing import Optional +from pydantic import BaseModel, Field + class ServiceTypes(str, enum.Enum): """ @@ -10,6 +12,7 @@ class ServiceTypes(str, enum.Enum): REDIS = "redis" DB = "postgres" + BATCH_WRITE_TO_DB = "batch_write_to_db" LITELLM = "self" From f71ba63cab2c1a6a79d1058133853ca66bebd8ca Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:39:03 -0700 Subject: [PATCH 112/655] refactor use common helper --- litellm/litellm_core_utils/core_helpers.py | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index d6d6495d0..8148147ef 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -1,5 +1,6 @@ # What is this? ## Helper utilities +from typing import List, Literal, Optional, Tuple def map_finish_reason( @@ -54,3 +55,31 @@ def remove_index_from_tool_calls(messages, tool_calls): tool_call.pop("index") return + + +def get_litellm_metadata_from_kwargs(kwargs: dict): + """ + Helper to get litellm metadata from all litellm request kwargs + """ + return kwargs.get("litellm_params", {}).get("metadata", {}) + + +# Helper functions used for OTEL logging +def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): + try: + if kwargs is None: + return None + litellm_params = kwargs.get("litellm_params") + _metadata = kwargs.get("metadata") or {} + if "litellm_parent_otel_span" in _metadata: + return _metadata["litellm_parent_otel_span"] + elif ( + litellm_params is not None + and litellm_params.get("metadata") is not None + and "litellm_parent_otel_span" in litellm_params.get("metadata", {}) + ): + return litellm_params["metadata"]["litellm_parent_otel_span"] + elif "litellm_parent_otel_span" in kwargs: + return kwargs["litellm_parent_otel_span"] + except: + return None From 19fb5cc11c46e892829b3a6ed303af8aec2ff14f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:40:39 -0700 Subject: [PATCH 113/655] use common helpers for writing to otel --- litellm/_service_logger.py | 2 ++ litellm/caching.py | 2 +- litellm/integrations/opentelemetry.py | 26 +++++--------------------- litellm/proxy/proxy_server.py | 18 ++++++++---------- litellm/proxy/utils.py | 9 +++++++-- 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index be8d7cf89..5d9ec7405 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -56,6 +56,7 @@ class ServiceLogging(CustomLogger): parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None, + event_metadata: Optional[dict] = None, ): """ - For counting if the redis, postgres call is successful @@ -84,6 +85,7 @@ class ServiceLogging(CustomLogger): parent_otel_span=parent_otel_span, start_time=start_time, end_time=end_time, + event_metadata=event_metadata, ) async def async_service_failure_hook( diff --git a/litellm/caching.py b/litellm/caching.py index 3f14b8468..557a029d3 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -21,7 +21,7 @@ from openai._models import BaseModel as OpenAIObject import litellm from litellm._logging import verbose_logger -from litellm.integrations.opentelemetry import _get_parent_otel_span_from_kwargs +from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs from litellm.types.services import ServiceLoggerPayload, ServiceTypes diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index e44bc6bee..345e5152a 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -119,6 +119,7 @@ class OpenTelemetry(CustomLogger): parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None, + event_metadata: Optional[dict] = None, ): from datetime import datetime @@ -149,6 +150,10 @@ class OpenTelemetry(CustomLogger): service_logging_span.set_attribute( key="service", value=payload.service.value ) + + if event_metadata: + for key, value in event_metadata.items(): + service_logging_span.set_attribute(key, value) service_logging_span.set_status(Status(StatusCode.OK)) service_logging_span.end(end_time=_end_time_ns) @@ -703,24 +708,3 @@ class OpenTelemetry(CustomLogger): management_endpoint_span.set_attribute(f"exception", str(_exception)) management_endpoint_span.set_status(Status(StatusCode.ERROR)) management_endpoint_span.end(end_time=_end_time_ns) - - -# Helper functions used for OTEL logging -def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): - try: - if kwargs is None: - return None - litellm_params = kwargs.get("litellm_params") - _metadata = kwargs.get("metadata") or {} - if "litellm_parent_otel_span" in _metadata: - return _metadata["litellm_parent_otel_span"] - elif ( - litellm_params is not None - and litellm_params.get("metadata") is not None - and "litellm_parent_otel_span" in litellm_params.get("metadata", {}) - ): - return litellm_params["metadata"]["litellm_parent_otel_span"] - elif "litellm_parent_otel_span" in kwargs: - return kwargs["litellm_parent_otel_span"] - except: - return None diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 019839d78..3a80e1960 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -108,6 +108,7 @@ from litellm._logging import verbose_proxy_logger, verbose_router_logger from litellm.caching import DualCache, RedisCache from litellm.exceptions import RejectedRequestError from litellm.integrations.slack_alerting import SlackAlerting, SlackAlertingArgs +from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.proxy._types import * from litellm.proxy.analytics_endpoints.analytics_endpoints import ( @@ -672,18 +673,15 @@ async def _PROXY_track_cost_callback( litellm_params = kwargs.get("litellm_params", {}) or {} proxy_server_request = litellm_params.get("proxy_server_request") or {} end_user_id = proxy_server_request.get("body", {}).get("user", None) - user_id = kwargs["litellm_params"]["metadata"].get("user_api_key_user_id", None) - team_id = kwargs["litellm_params"]["metadata"].get("user_api_key_team_id", None) - org_id = kwargs["litellm_params"]["metadata"].get("user_api_key_org_id", None) - key_alias = kwargs["litellm_params"]["metadata"].get("user_api_key_alias", None) - end_user_max_budget = kwargs["litellm_params"]["metadata"].get( - "user_api_end_user_max_budget", None - ) + metadata = get_litellm_metadata_from_kwargs(kwargs=kwargs) + user_id = metadata.get("user_api_key_user_id", None) + team_id = metadata.get("user_api_key_team_id", None) + org_id = metadata.get("user_api_key_org_id", None) + key_alias = metadata.get("user_api_key_alias", None) + end_user_max_budget = metadata.get("user_api_end_user_max_budget", None) if kwargs.get("response_cost", None) is not None: response_cost = kwargs["response_cost"] - user_api_key = kwargs["litellm_params"]["metadata"].get( - "user_api_key", None - ) + user_api_key = metadata.get("user_api_key", None) if kwargs.get("cache_hit", False) == True: response_cost = 0.0 diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 682d85539..923021efc 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -31,8 +31,11 @@ from litellm._service_logger import ServiceLogging, ServiceTypes from litellm.caching import DualCache, RedisCache from litellm.exceptions import RejectedRequestError from litellm.integrations.custom_logger import CustomLogger -from litellm.integrations.opentelemetry import _get_parent_otel_span_from_kwargs from litellm.integrations.slack_alerting import SlackAlerting +from litellm.litellm_core_utils.core_helpers import ( + _get_parent_otel_span_from_kwargs, + get_litellm_metadata_from_kwargs, +) from litellm.litellm_core_utils.litellm_logging import Logging from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.proxy._types import ( @@ -139,13 +142,15 @@ def log_to_opentelemetry(func): if parent_otel_span is not None: from litellm.proxy.proxy_server import proxy_logging_obj + metadata = get_litellm_metadata_from_kwargs(kwargs=passed_kwargs) await proxy_logging_obj.service_logging_obj.async_service_success_hook( - service=ServiceTypes.DB, + service=ServiceTypes.BATCH_WRITE_TO_DB, call_type=func.__name__, parent_otel_span=parent_otel_span, duration=0.0, start_time=start_time, end_time=end_time, + event_metadata=metadata, ) # end of logging to otel return result From 7c0ea16e3d6a5523f21aec90ec570cbad8414f50 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 11:47:17 -0700 Subject: [PATCH 114/655] test otel for batch_write_to_db --- tests/otel_tests/test_otel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/otel_tests/test_otel.py b/tests/otel_tests/test_otel.py index d3f070d5f..0db30b2f3 100644 --- a/tests/otel_tests/test_otel.py +++ b/tests/otel_tests/test_otel.py @@ -115,3 +115,4 @@ async def test_chat_completion_check_otel_spans(): assert "redis" in parent_trace_spans assert "raw_gen_ai_request" in parent_trace_spans assert "litellm_request" in parent_trace_spans + assert "batch_write_to_db" in parent_trace_spans From 7dac0e0001fccdd3b6b59ea842308992e6c65ade Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 12:05:24 -0700 Subject: [PATCH 115/655] fix otel logging --- tests/otel_tests/test_otel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/otel_tests/test_otel.py b/tests/otel_tests/test_otel.py index 0db30b2f3..1c38e8125 100644 --- a/tests/otel_tests/test_otel.py +++ b/tests/otel_tests/test_otel.py @@ -5,6 +5,7 @@ import asyncio import aiohttp, openai from openai import OpenAI, AsyncOpenAI from typing import Optional, List, Union +import uuid async def generate_key( @@ -46,7 +47,7 @@ async def chat_completion(session, key, model: Union[str, List] = "gpt-4"): data = { "model": model, "messages": [ - {"role": "user", "content": "Hello!"}, + {"role": "user", "content": f"Hello! {str(uuid.uuid4())}"}, ], } From 5b71421a7b84fe5e12f8f5538dac9cba1fe8cc46 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 12:54:14 -0700 Subject: [PATCH 116/655] feat(vertex_ai_partner.py): initial working commit for calling vertex ai mistral Closes https://github.com/BerriAI/litellm/issues/4874 --- litellm/__init__.py | 6 +- litellm/llms/databricks.py | 300 ++++++++++++------ litellm/llms/openai.py | 2 +- ...ertex_ai_llama.py => vertex_ai_partner.py} | 39 ++- litellm/main.py | 8 +- ...odel_prices_and_context_window_backup.json | 10 + .../tests/test_amazing_vertex_completion.py | 65 +++- litellm/tests/test_optional_params.py | 15 + litellm/utils.py | 28 +- model_prices_and_context_window.json | 10 + 10 files changed, 343 insertions(+), 140 deletions(-) rename litellm/llms/{vertex_ai_llama.py => vertex_ai_partner.py} (79%) diff --git a/litellm/__init__.py b/litellm/__init__.py index b6aacad1a..97a0a05ea 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -358,6 +358,7 @@ vertex_code_text_models: List = [] vertex_embedding_models: List = [] vertex_anthropic_models: List = [] vertex_llama3_models: List = [] +vertex_mistral_models: List = [] ai21_models: List = [] nlp_cloud_models: List = [] aleph_alpha_models: List = [] @@ -403,6 +404,9 @@ for key, value in model_cost.items(): elif value.get("litellm_provider") == "vertex_ai-llama_models": key = key.replace("vertex_ai/", "") vertex_llama3_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-mistral_models": + key = key.replace("vertex_ai/", "") + vertex_mistral_models.append(key) elif value.get("litellm_provider") == "ai21": ai21_models.append(key) elif value.get("litellm_provider") == "nlp_cloud": @@ -833,7 +837,7 @@ from .llms.petals import PetalsConfig from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig -from .llms.vertex_ai_llama import VertexAILlama3Config +from .llms.vertex_ai_partner import VertexAILlama3Config from .llms.sagemaker import SagemakerConfig from .llms.ollama import OllamaConfig from .llms.ollama_chat import OllamaChatConfig diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 88fa58abe..a0ddba1f5 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -15,8 +15,14 @@ import requests # type: ignore import litellm from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.types.llms.databricks import GenericStreamingChunk -from litellm.types.utils import ProviderField +from litellm.types.llms.openai import ( + ChatCompletionDeltaChunk, + ChatCompletionResponseMessage, + ChatCompletionToolCallChunk, + ChatCompletionToolCallFunctionChunk, + ChatCompletionUsageBlock, +) +from litellm.types.utils import GenericStreamingChunk, ProviderField from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage from .base import BaseLLM @@ -114,71 +120,6 @@ class DatabricksConfig: optional_params["stop"] = value return optional_params - def _chunk_parser(self, chunk_data: str) -> GenericStreamingChunk: - try: - text = "" - is_finished = False - finish_reason = None - logprobs = None - usage = None - original_chunk = None # this is used for function/tool calling - chunk_data = chunk_data.replace("data:", "") - chunk_data = chunk_data.strip() - if len(chunk_data) == 0 or chunk_data == "[DONE]": - return { - "text": "", - "is_finished": is_finished, - "finish_reason": finish_reason, - } - chunk_data_dict = json.loads(chunk_data) - str_line = litellm.ModelResponse(**chunk_data_dict, stream=True) - - if len(str_line.choices) > 0: - if ( - str_line.choices[0].delta is not None # type: ignore - and str_line.choices[0].delta.content is not None # type: ignore - ): - text = str_line.choices[0].delta.content # type: ignore - else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai - original_chunk = str_line - if str_line.choices[0].finish_reason: - is_finished = True - finish_reason = str_line.choices[0].finish_reason - if finish_reason == "content_filter": - if hasattr(str_line.choices[0], "content_filter_result"): - error_message = json.dumps( - str_line.choices[0].content_filter_result # type: ignore - ) - else: - error_message = "Azure Response={}".format( - str(dict(str_line)) - ) - raise litellm.AzureOpenAIError( - status_code=400, message=error_message - ) - - # checking for logprobs - if ( - hasattr(str_line.choices[0], "logprobs") - and str_line.choices[0].logprobs is not None - ): - logprobs = str_line.choices[0].logprobs - else: - logprobs = None - - usage = getattr(str_line, "usage", None) - - return GenericStreamingChunk( - text=text, - is_finished=is_finished, - finish_reason=finish_reason, - logprobs=logprobs, - original_chunk=original_chunk, - usage=usage, - ) - except Exception as e: - raise e - class DatabricksEmbeddingConfig: """ @@ -236,7 +177,9 @@ async def make_call( if response.status_code != 200: raise DatabricksError(status_code=response.status_code, message=response.text) - completion_stream = response.aiter_lines() + completion_stream = ModelResponseIterator( + streaming_response=response.aiter_lines(), sync_stream=False + ) # LOGGING logging_obj.post_call( input=messages, @@ -248,6 +191,38 @@ async def make_call( return completion_stream +def make_sync_call( + client: Optional[HTTPHandler], + api_base: str, + headers: dict, + data: str, + model: str, + messages: list, + logging_obj, +): + if client is None: + client = HTTPHandler() # Create a new client if none provided + + response = client.post(api_base, headers=headers, data=data, stream=True) + + if response.status_code != 200: + raise DatabricksError(status_code=response.status_code, message=response.read()) + + completion_stream = ModelResponseIterator( + streaming_response=response.iter_lines(), sync_stream=True + ) + + # LOGGING + logging_obj.post_call( + input=messages, + api_key="", + original_response="first stream response received", + additional_args={"complete_input_dict": data}, + ) + + return completion_stream + + class DatabricksChatCompletion(BaseLLM): def __init__(self) -> None: super().__init__() @@ -259,6 +234,7 @@ class DatabricksChatCompletion(BaseLLM): api_key: Optional[str], api_base: Optional[str], endpoint_type: Literal["chat_completions", "embeddings"], + custom_endpoint: Optional[bool], ) -> Tuple[str, dict]: if api_key is None: raise DatabricksError( @@ -277,9 +253,9 @@ class DatabricksChatCompletion(BaseLLM): "Content-Type": "application/json", } - if endpoint_type == "chat_completions": + if endpoint_type == "chat_completions" and custom_endpoint is not True: api_base = "{}/chat/completions".format(api_base) - elif endpoint_type == "embeddings": + elif endpoint_type == "embeddings" and custom_endpoint is not True: api_base = "{}/embeddings".format(api_base) return api_base, headers @@ -464,8 +440,12 @@ class DatabricksChatCompletion(BaseLLM): timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): + custom_endpoint: Optional[bool] = optional_params.pop("custom_endpoint", None) api_base, headers = self._validate_environment( - api_base=api_base, api_key=api_key, endpoint_type="chat_completions" + api_base=api_base, + api_key=api_key, + endpoint_type="chat_completions", + custom_endpoint=custom_endpoint, ) ## Load Config config = litellm.DatabricksConfig().get_config() @@ -475,7 +455,8 @@ class DatabricksChatCompletion(BaseLLM): ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in optional_params[k] = v - stream = optional_params.pop("stream", None) + stream: bool = optional_params.pop("stream", None) or False + optional_params["stream"] = stream data = { "model": model, @@ -539,41 +520,26 @@ class DatabricksChatCompletion(BaseLLM): timeout=timeout, ) else: - if client is None or isinstance(client, AsyncHTTPHandler): - self.client = HTTPHandler(timeout=timeout) # type: ignore - else: - self.client = client + if client is None or not isinstance(client, HTTPHandler): + client = HTTPHandler(timeout=timeout) # type: ignore ## COMPLETION CALL - if ( - stream is not None and stream == True - ): # if function call - fake the streaming (need complete blocks for output parsing in openai format) - print_verbose("makes dbrx streaming POST request") - data["stream"] = stream - try: - response = self.client.post( - api_base, headers=headers, data=json.dumps(data), stream=stream - ) - response.raise_for_status() - completion_stream = response.iter_lines() - except httpx.HTTPStatusError as e: - raise DatabricksError( - status_code=e.response.status_code, message=response.text - ) - except httpx.TimeoutException as e: - raise DatabricksError( - status_code=408, message="Timeout error occurred." - ) - except Exception as e: - raise DatabricksError(status_code=408, message=str(e)) - - streaming_response = CustomStreamWrapper( - completion_stream=completion_stream, + if stream is True: + return CustomStreamWrapper( + completion_stream=None, + make_call=partial( + make_sync_call, + client=None, + api_base=api_base, + headers=headers, # type: ignore + data=json.dumps(data), + model=model, + messages=messages, + logging_obj=logging_obj, + ), model=model, - custom_llm_provider="databricks", + custom_llm_provider="vertex_ai_beta", logging_obj=logging_obj, ) - return streaming_response - else: try: response = self.client.post( @@ -667,7 +633,10 @@ class DatabricksChatCompletion(BaseLLM): aembedding=None, ) -> EmbeddingResponse: api_base, headers = self._validate_environment( - api_base=api_base, api_key=api_key, endpoint_type="embeddings" + api_base=api_base, + api_key=api_key, + endpoint_type="embeddings", + custom_endpoint=False, ) model = model data = {"model": model, "input": input, **optional_params} @@ -716,3 +685,126 @@ class DatabricksChatCompletion(BaseLLM): ) return litellm.EmbeddingResponse(**response_json) + + +class ModelResponseIterator: + def __init__(self, streaming_response, sync_stream: bool): + self.streaming_response = streaming_response + + def chunk_parser(self, chunk: dict) -> GenericStreamingChunk: + try: + processed_chunk = litellm.ModelResponse(**chunk, stream=True) # type: ignore + + text = "" + tool_use: Optional[ChatCompletionToolCallChunk] = None + is_finished = False + finish_reason = "" + usage: Optional[ChatCompletionUsageBlock] = None + + if processed_chunk.choices[0].delta.content is not None: # type: ignore + text = processed_chunk.choices[0].delta.content # type: ignore + + if ( + processed_chunk.choices[0].delta.tool_calls is not None # type: ignore + and len(processed_chunk.choices[0].delta.tool_calls) > 0 # type: ignore + and processed_chunk.choices[0].delta.tool_calls[0].function is not None # type: ignore + and processed_chunk.choices[0].delta.tool_calls[0].function.arguments # type: ignore + is not None + ): + tool_use = ChatCompletionToolCallChunk( + id=processed_chunk.choices[0].delta.tool_calls[0].id, # type: ignore + type="function", + function=ChatCompletionToolCallFunctionChunk( + name=processed_chunk.choices[0] + .delta.tool_calls[0] # type: ignore + .function.name, + arguments=processed_chunk.choices[0] + .delta.tool_calls[0] # type: ignore + .function.arguments, + ), + index=processed_chunk.choices[0].index, + ) + + if processed_chunk.choices[0].finish_reason is not None: + is_finished = True + finish_reason = processed_chunk.choices[0].finish_reason + + if hasattr(processed_chunk, "usage"): + usage = processed_chunk.usage # type: ignore + + return GenericStreamingChunk( + text=text, + tool_use=tool_use, + is_finished=is_finished, + finish_reason=finish_reason, + usage=usage, + index=0, + ) + except json.JSONDecodeError: + raise ValueError(f"Failed to decode JSON from chunk: {chunk}") + + # Sync iterator + def __iter__(self): + self.response_iterator = self.streaming_response + return self + + def __next__(self): + try: + chunk = self.response_iterator.__next__() + except StopIteration: + raise StopIteration + except ValueError as e: + raise RuntimeError(f"Error receiving chunk from stream: {e}") + + try: + chunk = chunk.replace("data:", "") + chunk = chunk.strip() + if len(chunk) > 0: + json_chunk = json.loads(chunk) + return self.chunk_parser(chunk=json_chunk) + else: + return GenericStreamingChunk( + text="", + is_finished=False, + finish_reason="", + usage=None, + index=0, + tool_use=None, + ) + except StopIteration: + raise StopIteration + except ValueError as e: + raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}") + + # Async iterator + def __aiter__(self): + self.async_response_iterator = self.streaming_response.__aiter__() + return self + + async def __anext__(self): + try: + chunk = await self.async_response_iterator.__anext__() + except StopAsyncIteration: + raise StopAsyncIteration + except ValueError as e: + raise RuntimeError(f"Error receiving chunk from stream: {e}") + + try: + chunk = chunk.replace("data:", "") + chunk = chunk.strip() + if len(chunk) > 0: + json_chunk = json.loads(chunk) + return self.chunk_parser(chunk=json_chunk) + else: + return GenericStreamingChunk( + text="", + is_finished=False, + finish_reason="", + usage=None, + index=0, + tool_use=None, + ) + except StopAsyncIteration: + raise StopAsyncIteration + except ValueError as e: + raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}") diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 94000233c..afd49ab14 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -160,7 +160,7 @@ class MistralConfig: optional_params["max_tokens"] = value if param == "tools": optional_params["tools"] = value - if param == "stream" and value == True: + if param == "stream" and value is True: optional_params["stream"] = value if param == "temperature": optional_params["temperature"] = value diff --git a/litellm/llms/vertex_ai_llama.py b/litellm/llms/vertex_ai_partner.py similarity index 79% rename from litellm/llms/vertex_ai_llama.py rename to litellm/llms/vertex_ai_partner.py index cc4786c4b..66f8a1740 100644 --- a/litellm/llms/vertex_ai_llama.py +++ b/litellm/llms/vertex_ai_partner.py @@ -7,7 +7,7 @@ import time import types import uuid from enum import Enum -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Literal, Optional, Tuple, Union import httpx # type: ignore import requests # type: ignore @@ -108,14 +108,25 @@ class VertexAILlama3Config: return optional_params -class VertexAILlama3(BaseLLM): +class VertexAIPartnerModels(BaseLLM): def __init__(self) -> None: pass - def create_vertex_llama3_url( - self, vertex_location: str, vertex_project: str + def create_vertex_url( + self, + vertex_location: str, + vertex_project: str, + partner: Literal["llama", "mistralai"], + stream: Optional[bool], + model: str, ) -> str: - return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi" + if partner == "llama": + return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi" + elif partner == "mistralai": + if stream: + return f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:streamRawPredict" + else: + return f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:rawPredict" def completion( self, @@ -141,6 +152,7 @@ class VertexAILlama3(BaseLLM): import vertexai from google.cloud import aiplatform + from litellm.llms.databricks import DatabricksChatCompletion from litellm.llms.openai import OpenAIChatCompletion from litellm.llms.vertex_httpx import VertexLLM except Exception: @@ -166,6 +178,7 @@ class VertexAILlama3(BaseLLM): ) openai_chat_completions = OpenAIChatCompletion() + openai_like_chat_completions = DatabricksChatCompletion() ## Load Config # config = litellm.VertexAILlama3.get_config() @@ -178,12 +191,23 @@ class VertexAILlama3(BaseLLM): optional_params["stream"] = stream - api_base = self.create_vertex_llama3_url( + if "llama" in model: + partner = "llama" + elif "mistral" in model: + partner = "mistralai" + optional_params["custom_endpoint"] = True + + api_base = self.create_vertex_url( vertex_location=vertex_location or "us-central1", vertex_project=vertex_project or project_id, + partner=partner, # type: ignore + stream=stream, + model=model, ) - return openai_chat_completions.completion( + model = model.split("@")[0] + + return openai_like_chat_completions.completion( model=model, messages=messages, api_base=api_base, @@ -198,6 +222,7 @@ class VertexAILlama3(BaseLLM): logger_fn=logger_fn, client=client, timeout=timeout, + encoding=encoding, ) except Exception as e: diff --git a/litellm/main.py b/litellm/main.py index 672029f69..c88119df9 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -121,7 +121,7 @@ from .llms.prompt_templates.factory import ( ) from .llms.text_completion_codestral import CodestralTextCompletion from .llms.triton import TritonChatCompletion -from .llms.vertex_ai_llama import VertexAILlama3 +from .llms.vertex_ai_partner import VertexAIPartnerModels from .llms.vertex_httpx import VertexLLM from .llms.watsonx import IBMWatsonXAI from .types.llms.openai import HttpxBinaryResponseContent @@ -158,7 +158,7 @@ triton_chat_completions = TritonChatCompletion() bedrock_chat_completion = BedrockLLM() bedrock_converse_chat_completion = BedrockConverseLLM() vertex_chat_completion = VertexLLM() -vertex_llama_chat_completion = VertexAILlama3() +vertex_partner_models_chat_completion = VertexAIPartnerModels() watsonxai = IBMWatsonXAI() ####### COMPLETION ENDPOINTS ################ @@ -2068,8 +2068,8 @@ def completion( timeout=timeout, client=client, ) - elif model.startswith("meta/"): - model_response = vertex_llama_chat_completion.completion( + elif model.startswith("meta/") or model.startswith("mistral"): + model_response = vertex_partner_models_chat_completion.completion( model=model, messages=messages, model_response=model_response, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 0f20f6689..703e6c82c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2028,6 +2028,16 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/mistral-large@latest": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/mistral-large@2407": { "max_tokens": 8191, "max_input_tokens": 128000, diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index bebe5d031..5419c25ff 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -899,16 +899,18 @@ from litellm.tests.test_completion import response_format_tests @pytest.mark.parametrize( - "model", ["vertex_ai/meta/llama3-405b-instruct-maas"] + "model", + [ + "vertex_ai/mistral-large@2407", + "vertex_ai/meta/llama3-405b-instruct-maas", + ], # ) # "vertex_ai", @pytest.mark.parametrize( "sync_mode", - [ - True, - ], -) # False + [True, False], +) # @pytest.mark.asyncio -async def test_llama_3_httpx(model, sync_mode): +async def test_partner_models_httpx(model, sync_mode): try: load_vertex_ai_credentials() litellm.set_verbose = True @@ -946,6 +948,57 @@ async def test_llama_3_httpx(model, sync_mode): pytest.fail("An unexpected exception occurred - {}".format(str(e))) +@pytest.mark.parametrize( + "model", + [ + "vertex_ai/mistral-large@2407", + "vertex_ai/meta/llama3-405b-instruct-maas", + ], # +) # "vertex_ai", +@pytest.mark.parametrize( + "sync_mode", + [True, False], # +) # +@pytest.mark.asyncio +async def test_partner_models_httpx_streaming(model, sync_mode): + try: + load_vertex_ai_credentials() + litellm.set_verbose = True + + messages = [ + { + "role": "system", + "content": "Your name is Litellm Bot, you are a helpful assistant", + }, + # User asks for their name and weather in San Francisco + { + "role": "user", + "content": "Hello, what is your name and can you tell me the weather?", + }, + ] + + data = {"model": model, "messages": messages, "stream": True} + if sync_mode: + response = litellm.completion(**data) + for idx, chunk in enumerate(response): + streaming_format_tests(idx=idx, chunk=chunk) + else: + response = await litellm.acompletion(**data) + idx = 0 + async for chunk in response: + streaming_format_tests(idx=idx, chunk=chunk) + idx += 1 + + print(f"response: {response}") + except litellm.RateLimitError: + pass + except Exception as e: + if "429 Quota exceeded" in str(e): + pass + else: + pytest.fail("An unexpected exception occurred - {}".format(str(e))) + + def vertex_httpx_mock_reject_prompt_post(*args, **kwargs): mock_response = MagicMock() mock_response.status_code = 200 diff --git a/litellm/tests/test_optional_params.py b/litellm/tests/test_optional_params.py index b8011960e..83ac855a8 100644 --- a/litellm/tests/test_optional_params.py +++ b/litellm/tests/test_optional_params.py @@ -141,6 +141,21 @@ def test_vertex_ai_llama_3_optional_params(): assert "user" not in optional_params +def test_vertex_ai_mistral_optional_params(): + litellm.vertex_mistral_models = ["mistral-large@2407"] + litellm.drop_params = True + optional_params = get_optional_params( + model="mistral-large@2407", + user="John", + custom_llm_provider="vertex_ai", + max_tokens=10, + temperature=0.2, + ) + assert "user" not in optional_params + assert "max_tokens" in optional_params + assert "temperature" in optional_params + + def test_azure_gpt_optional_params_gpt_vision(): # for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here optional_params = litellm.utils.get_optional_params( diff --git a/litellm/utils.py b/litellm/utils.py index 358904677..27b3f60c1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3104,6 +3104,15 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, ) + elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models: + supported_params = get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + _check_valid_arg(supported_params=supported_params) + optional_params = litellm.MistralConfig().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + ) elif custom_llm_provider == "sagemaker": ## check if unsupported param passed in supported_params = get_supported_openai_params( @@ -4210,7 +4219,8 @@ def get_supported_openai_params( if request_type == "chat_completion": if model.startswith("meta/"): return litellm.VertexAILlama3Config().get_supported_openai_params() - + if model.startswith("mistral"): + return litellm.MistralConfig().get_supported_openai_params() return litellm.VertexAIConfig().get_supported_openai_params() elif request_type == "embeddings": return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params() @@ -9631,22 +9641,6 @@ class CustomStreamWrapper: completion_tokens=response_obj["usage"].completion_tokens, total_tokens=response_obj["usage"].total_tokens, ) - elif self.custom_llm_provider == "databricks": - response_obj = litellm.DatabricksConfig()._chunk_parser(chunk) - completion_obj["content"] = response_obj["text"] - print_verbose(f"completion obj content: {completion_obj['content']}") - if response_obj["is_finished"]: - self.received_finish_reason = response_obj["finish_reason"] - if ( - self.stream_options - and self.stream_options.get("include_usage", False) == True - and response_obj["usage"] is not None - ): - model_response.usage = litellm.Usage( - prompt_tokens=response_obj["usage"].prompt_tokens, - completion_tokens=response_obj["usage"].completion_tokens, - total_tokens=response_obj["usage"].total_tokens, - ) elif self.custom_llm_provider == "azure_text": response_obj = self.handle_azure_text_completion_chunk(chunk) completion_obj["content"] = response_obj["text"] diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 0f20f6689..703e6c82c 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2028,6 +2028,16 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/mistral-large@latest": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/mistral-large@2407": { "max_tokens": 8191, "max_input_tokens": 128000, From 05ba34b9b719bac16b3671bf4602fdb97dbc069e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 13:13:31 -0700 Subject: [PATCH 117/655] fix(utils.py): add exception mapping for databricks errors --- litellm/tests/test_exceptions.py | 4 +++- litellm/utils.py | 35 +++++++++++++++++--------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 66c8594bb..dfefe99d6 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -770,7 +770,9 @@ def test_litellm_predibase_exception(): # print(f"accuracy_score: {accuracy_score}") -@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta", "anthropic"]) +@pytest.mark.parametrize( + "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks"] +) def test_exception_mapping(provider): """ For predibase, run through a set of mock exceptions diff --git a/litellm/utils.py b/litellm/utils.py index 358904677..780148059 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6723,7 +6723,10 @@ def exception_type( model=model, response=original_exception.response, ) - elif custom_llm_provider == "predibase": + elif ( + custom_llm_provider == "predibase" + or custom_llm_provider == "databricks" + ): if "authorization denied for" in error_str: exception_mapping_worked = True @@ -6739,8 +6742,8 @@ def exception_type( error_str += "XXXXXXX" + '"' raise AuthenticationError( - message=f"PredibaseException: Authentication Error - {error_str}", - llm_provider="predibase", + message=f"{custom_llm_provider}Exception: Authentication Error - {error_str}", + llm_provider=custom_llm_provider, model=model, response=original_exception.response, litellm_debug_info=extra_information, @@ -6749,35 +6752,35 @@ def exception_type( if original_exception.status_code == 500: exception_mapping_worked = True raise litellm.InternalServerError( - message=f"PredibaseException - {original_exception.message}", - llm_provider="predibase", + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, model=model, ) elif original_exception.status_code == 401: exception_mapping_worked = True raise AuthenticationError( - message=f"PredibaseException - {original_exception.message}", - llm_provider="predibase", + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, model=model, ) elif original_exception.status_code == 400: exception_mapping_worked = True raise BadRequestError( - message=f"PredibaseException - {original_exception.message}", - llm_provider="predibase", + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, model=model, ) elif original_exception.status_code == 404: exception_mapping_worked = True raise NotFoundError( - message=f"PredibaseException - {original_exception.message}", - llm_provider="predibase", + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, model=model, ) elif original_exception.status_code == 408: exception_mapping_worked = True raise Timeout( - message=f"PredibaseException - {original_exception.message}", + message=f"{custom_llm_provider}Exception - {original_exception.message}", model=model, llm_provider=custom_llm_provider, litellm_debug_info=extra_information, @@ -6788,7 +6791,7 @@ def exception_type( ): exception_mapping_worked = True raise BadRequestError( - message=f"PredibaseException - {original_exception.message}", + message=f"{custom_llm_provider}Exception - {original_exception.message}", model=model, llm_provider=custom_llm_provider, litellm_debug_info=extra_information, @@ -6796,7 +6799,7 @@ def exception_type( elif original_exception.status_code == 429: exception_mapping_worked = True raise RateLimitError( - message=f"PredibaseException - {original_exception.message}", + message=f"{custom_llm_provider}Exception - {original_exception.message}", model=model, llm_provider=custom_llm_provider, litellm_debug_info=extra_information, @@ -6804,7 +6807,7 @@ def exception_type( elif original_exception.status_code == 503: exception_mapping_worked = True raise ServiceUnavailableError( - message=f"PredibaseException - {original_exception.message}", + message=f"{custom_llm_provider}Exception - {original_exception.message}", model=model, llm_provider=custom_llm_provider, litellm_debug_info=extra_information, @@ -6812,7 +6815,7 @@ def exception_type( elif original_exception.status_code == 504: # gateway timeout error exception_mapping_worked = True raise Timeout( - message=f"PredibaseException - {original_exception.message}", + message=f"{custom_llm_provider}Exception - {original_exception.message}", model=model, llm_provider=custom_llm_provider, litellm_debug_info=extra_information, From 3c77f3975144e1e31603d1ebb2926a715a14c05b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 13:15:00 -0700 Subject: [PATCH 118/655] fix(databricks.py): fix client used --- litellm/llms/databricks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index a0ddba1f5..18dd4ab65 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -542,7 +542,7 @@ class DatabricksChatCompletion(BaseLLM): ) else: try: - response = self.client.post( + response = client.post( api_base, headers=headers, data=json.dumps(data) ) response.raise_for_status() From 35ecf3c06b7591b7de61af3603df658130fa8476 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 15:30:58 -0700 Subject: [PATCH 119/655] fix otel test --- tests/otel_tests/test_otel.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/otel_tests/test_otel.py b/tests/otel_tests/test_otel.py index 1c38e8125..d0d312128 100644 --- a/tests/otel_tests/test_otel.py +++ b/tests/otel_tests/test_otel.py @@ -97,6 +97,8 @@ async def test_chat_completion_check_otel_spans(): key = key_gen["key"] await chat_completion(session=session, key=key, model="fake-openai-endpoint") + await asyncio.sleep(3) + otel_spans = await get_otel_spans(session=session, key=key) print("otel_spans: ", otel_spans) @@ -108,8 +110,8 @@ async def test_chat_completion_check_otel_spans(): print("Parent trace spans: ", parent_trace_spans) - # either 4 or 5 traces depending on how many redis calls were made - assert len(parent_trace_spans) == 5 or len(parent_trace_spans) == 4 + # either 5 or 6 traces depending on how many redis calls were made + assert len(parent_trace_spans) == 6 or len(parent_trace_spans) == 5 # 'postgres', 'redis', 'raw_gen_ai_request', 'litellm_request', 'Received Proxy Server Request' in the span assert "postgres" in parent_trace_spans From c85ed01756bb55fc7aaa8729dac8f453a0bd327f Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 15:32:57 -0700 Subject: [PATCH 120/655] feat(utils.py): fix openai-like streaming --- litellm/llms/databricks.py | 7 +++++-- litellm/llms/vertex_ai_partner.py | 2 +- litellm/main.py | 1 + litellm/utils.py | 17 +++++++++++++---- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 18dd4ab65..1a276f52c 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -344,6 +344,7 @@ class DatabricksChatCompletion(BaseLLM): self, model: str, messages: list, + custom_llm_provider: str, api_base: str, custom_prompt_dict: dict, model_response: ModelResponse, @@ -373,7 +374,7 @@ class DatabricksChatCompletion(BaseLLM): logging_obj=logging_obj, ), model=model, - custom_llm_provider="databricks", + custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, ) return streamwrapper @@ -426,6 +427,7 @@ class DatabricksChatCompletion(BaseLLM): model: str, messages: list, api_base: str, + custom_llm_provider: str, custom_prompt_dict: dict, model_response: ModelResponse, print_verbose: Callable, @@ -499,6 +501,7 @@ class DatabricksChatCompletion(BaseLLM): logger_fn=logger_fn, headers=headers, client=client, + custom_llm_provider=custom_llm_provider, ) else: return self.acompletion_function( @@ -537,7 +540,7 @@ class DatabricksChatCompletion(BaseLLM): logging_obj=logging_obj, ), model=model, - custom_llm_provider="vertex_ai_beta", + custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, ) else: diff --git a/litellm/llms/vertex_ai_partner.py b/litellm/llms/vertex_ai_partner.py index 66f8a1740..eb24c4d26 100644 --- a/litellm/llms/vertex_ai_partner.py +++ b/litellm/llms/vertex_ai_partner.py @@ -177,7 +177,6 @@ class VertexAIPartnerModels(BaseLLM): credentials=vertex_credentials, project_id=vertex_project ) - openai_chat_completions = OpenAIChatCompletion() openai_like_chat_completions = DatabricksChatCompletion() ## Load Config @@ -223,6 +222,7 @@ class VertexAIPartnerModels(BaseLLM): client=client, timeout=timeout, encoding=encoding, + custom_llm_provider="vertex_ai_beta", ) except Exception as e: diff --git a/litellm/main.py b/litellm/main.py index c88119df9..4abd44707 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1867,6 +1867,7 @@ def completion( custom_prompt_dict=custom_prompt_dict, client=client, # pass AsyncOpenAI, OpenAI client encoding=encoding, + custom_llm_provider="databricks", ) except Exception as e: ## LOGGING - log the original exception returned diff --git a/litellm/utils.py b/litellm/utils.py index 27b3f60c1..7df136846 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9271,11 +9271,20 @@ class CustomStreamWrapper: try: # return this for all models completion_obj = {"content": ""} - if self.custom_llm_provider and ( - self.custom_llm_provider == "anthropic" - or self.custom_llm_provider in litellm._custom_providers + from litellm.types.utils import GenericStreamingChunk as GChunk + + if ( + isinstance(chunk, dict) + and all( + key in chunk for key in GChunk.__annotations__ + ) # check if chunk is a generic streaming chunk + ) or ( + self.custom_llm_provider + and ( + self.custom_llm_provider == "anthropic" + or self.custom_llm_provider in litellm._custom_providers + ) ): - from litellm.types.utils import GenericStreamingChunk as GChunk if self.received_finish_reason is not None: raise StopIteration From f76cad210c449c0325c9623d37b1e9ab441d8311 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 15:37:28 -0700 Subject: [PATCH 121/655] fix(utils.py): support fireworks ai finetuned models Fixes https://github.com/BerriAI/litellm/issues/4923 --- litellm/tests/test_get_llm_provider.py | 9 +++++++++ litellm/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/litellm/tests/test_get_llm_provider.py b/litellm/tests/test_get_llm_provider.py index 6f53b0f8f..d3b4302ac 100644 --- a/litellm/tests/test_get_llm_provider.py +++ b/litellm/tests/test_get_llm_provider.py @@ -25,6 +25,15 @@ def test_get_llm_provider(): # test_get_llm_provider() +def test_get_llm_provider_fireworks(): # tests finetuned fireworks models - https://github.com/BerriAI/litellm/issues/4923 + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model="fireworks_ai/accounts/my-test-1234" + ) + + assert custom_llm_provider == "fireworks_ai" + assert model == "accounts/my-test-1234" + + def test_get_llm_provider_catch_all(): _, response, _, _ = litellm.get_llm_provider(model="*") assert response == "openai" diff --git a/litellm/utils.py b/litellm/utils.py index 7df136846..ade88a3ae 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4473,7 +4473,7 @@ def get_llm_provider( dynamic_api_key = api_key or get_secret("DEEPSEEK_API_KEY") elif custom_llm_provider == "fireworks_ai": # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.fireworks.ai/inference/v1 - if not model.startswith("accounts/fireworks/models"): + if not model.startswith("accounts/"): model = f"accounts/fireworks/models/{model}" api_base = api_base or "https://api.fireworks.ai/inference/v1" dynamic_api_key = api_key or ( From d1989b6063cc392b4ae22d4d9d1168616f81feee Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 15:38:27 -0700 Subject: [PATCH 122/655] fix(utils.py): support fireworks ai finetuned models Fixes https://github.com/BerriAI/litellm/issues/4923 --- litellm/tests/test_get_llm_provider.py | 9 +++++++++ litellm/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/litellm/tests/test_get_llm_provider.py b/litellm/tests/test_get_llm_provider.py index 6f53b0f8f..d3b4302ac 100644 --- a/litellm/tests/test_get_llm_provider.py +++ b/litellm/tests/test_get_llm_provider.py @@ -25,6 +25,15 @@ def test_get_llm_provider(): # test_get_llm_provider() +def test_get_llm_provider_fireworks(): # tests finetuned fireworks models - https://github.com/BerriAI/litellm/issues/4923 + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model="fireworks_ai/accounts/my-test-1234" + ) + + assert custom_llm_provider == "fireworks_ai" + assert model == "accounts/my-test-1234" + + def test_get_llm_provider_catch_all(): _, response, _, _ = litellm.get_llm_provider(model="*") assert response == "openai" diff --git a/litellm/utils.py b/litellm/utils.py index 780148059..4e3a4e60a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4463,7 +4463,7 @@ def get_llm_provider( dynamic_api_key = api_key or get_secret("DEEPSEEK_API_KEY") elif custom_llm_provider == "fireworks_ai": # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.fireworks.ai/inference/v1 - if not model.startswith("accounts/fireworks/models"): + if not model.startswith("accounts/"): model = f"accounts/fireworks/models/{model}" api_base = api_base or "https://api.fireworks.ai/inference/v1" dynamic_api_key = api_key or ( From 4ab8d2229d23f362418ee93b1daf7d86543f1efe Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 16:08:41 -0700 Subject: [PATCH 123/655] security - check max request size --- litellm/proxy/auth/auth_utils.py | 68 +++++++++++++++++++++++++ litellm/proxy/auth/user_api_key_auth.py | 28 +++++----- litellm/proxy/proxy_config.yaml | 5 ++ 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index bd1e50ed0..83c676518 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -1,5 +1,7 @@ import re +from fastapi import Request + from litellm._logging import verbose_proxy_logger from litellm.proxy._types import * @@ -75,3 +77,69 @@ def is_llm_api_route(route: str) -> bool: return True return False + + +async def check_if_request_size_is_safe(request: Request) -> bool: + """ + Enterprise Only: + - Checks if the request size is within the limit + + Args: + request (Request): The incoming request. + + Returns: + bool: True if the request size is within the limit, False otherwise. + """ + from litellm.proxy.proxy_server import general_settings, premium_user + + max_request_size_mb = general_settings.get("max_request_size_mb", None) + if max_request_size_mb is not None: + # Check if premium user + if premium_user is not True: + verbose_proxy_logger.warning( + f"using max_request_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}" + ) + return True + + # Get the request body + content_length = request.headers.get("content-length") + + if content_length: + header_size = int(content_length) + header_size_mb = bytes_to_mb(bytes_value=header_size) + verbose_proxy_logger.debug( + f"content_length request size in MB={header_size_mb}" + ) + + if header_size_mb > max_request_size_mb: + raise ProxyException( + message=f"Request size is too large. Request size is {header_size_mb} MB. Max size is {max_request_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + else: + # If Content-Length is not available, read the body + body = await request.body() + body_size = len(body) + request_size_mb = bytes_to_mb(bytes_value=body_size) + + verbose_proxy_logger.debug( + f"request body request size in MB={request_size_mb}" + ) + if request_size_mb > max_request_size_mb: + raise ProxyException( + message=f"Request size is too large. Request size is {request_size_mb} MB. Max size is {max_request_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + + return True + + +def bytes_to_mb(bytes_value: int): + """ + Helper to convert bytes to MB + """ + return bytes_value / (1024 * 1024) diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index d91baf5ca..8a1f97f4c 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -57,6 +57,7 @@ from litellm.proxy.auth.auth_checks import ( log_to_opentelemetry, ) from litellm.proxy.auth.auth_utils import ( + check_if_request_size_is_safe, is_llm_api_route, route_in_additonal_public_routes, ) @@ -116,6 +117,21 @@ async def user_api_key_auth( try: route: str = request.url.path + ### LiteLLM Enterprise Security Checks + # Check 1. Check if request size is under max_request_size_mb + # Check 2. FILTER IP ADDRESS + await check_if_request_size_is_safe(request=request) + + is_valid_ip = _check_valid_ip( + allowed_ips=general_settings.get("allowed_ips", None), request=request + ) + + if not is_valid_ip: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access forbidden: IP address not allowed.", + ) + pass_through_endpoints: Optional[List[dict]] = general_settings.get( "pass_through_endpoints", None ) @@ -170,18 +186,6 @@ async def user_api_key_auth( ``` """ - ### FILTER IP ADDRESS ### - - is_valid_ip = _check_valid_ip( - allowed_ips=general_settings.get("allowed_ips", None), request=request - ) - - if not is_valid_ip: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Access forbidden: IP address not allowed.", - ) - if ( route in LiteLLMRoutes.public_routes.value or route_in_additonal_public_routes(current_route=route) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 4df510399..2fe8eb2a5 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -34,5 +34,10 @@ general_settings: "llm_requests_hanging": "https://hooks.slack.com/services/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", } + # Security controls + max_request_size_mb: 100 + # google cloud run maximum repsonses size is 32MB + max_response_size_mb: 100 + litellm_settings: success_callback: ["langfuse"] \ No newline at end of file From c5ee6fe4b9579c4668f1c4a81ad49fcc861c34fa Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 16:09:37 -0700 Subject: [PATCH 124/655] build: cookbook on migrating to litellm proxy from openai/azure sdk --- ...teLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb | 565 ++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb diff --git a/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb b/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb new file mode 100644 index 000000000..39677ed2a --- /dev/null +++ b/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb @@ -0,0 +1,565 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Migrating to LiteLLM Proxy from OpenAI/Azure OpenAI\n", + "\n", + "Covers:\n", + "\n", + "* /chat/completion\n", + "* /embedding\n", + "\n", + "\n", + "These are **selected examples**. LiteLLM Proxy is **OpenAI-Compatible**, it works with any project that calls OpenAI. Just change the `base_url`, `api_key` and `model`.\n", + "\n", + "For more examples, [go here](https://docs.litellm.ai/docs/proxy/user_keys)\n", + "\n", + "To pass provider-specific args, [go here](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)\n", + "\n", + "To drop unsupported params (E.g. frequency_penalty for bedrock with librechat), [go here](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)\n" + ], + "metadata": { + "id": "kccfk0mHZ4Ad" + } + }, + { + "cell_type": "markdown", + "source": [ + "## /chat/completion\n", + "\n" + ], + "metadata": { + "id": "nmSClzCPaGH6" + } + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI Python SDK" + ], + "metadata": { + "id": "_vqcjwOVaKpO" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x1e_Ok3KZzeP" + }, + "outputs": [], + "source": [ + "import openai\n", + "client = openai.OpenAI(\n", + " api_key=\"anything\",\n", + " base_url=\"http://0.0.0.0:4000\"\n", + ")\n", + "\n", + "# request sent to model set on litellm proxy, `litellm --model`\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"this is a test request, write a short poem\"\n", + " }\n", + " ],\n", + " extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n", + " \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n", + " \"generation_name\": \"ishaan-generation-openai-client\",\n", + " \"generation_id\": \"openai-client-gen-id22\",\n", + " \"trace_id\": \"openai-client-trace-id22\",\n", + " \"trace_user_id\": \"openai-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Calling" + ], + "metadata": { + "id": "AqkyKk9Scxgj" + } + }, + { + "cell_type": "code", + "source": [ + "from openai import OpenAI\n", + "client = OpenAI(\n", + " api_key=\"sk-1234\", # [OPTIONAL] set if you set one on proxy, else set \"\"\n", + " base_url=\"http://0.0.0.0:4000\",\n", + ")\n", + "\n", + "tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\",\n", + " },\n", + " \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n", + " },\n", + " \"required\": [\"location\"],\n", + " },\n", + " }\n", + " }\n", + "]\n", + "messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n", + "completion = client.chat.completions.create(\n", + " model=\"gpt-4o\", # use 'model_name' from config.yaml\n", + " messages=messages,\n", + " tools=tools,\n", + " tool_choice=\"auto\"\n", + ")\n", + "\n", + "print(completion)\n" + ], + "metadata": { + "id": "wDg10VqLczE1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Azure OpenAI Python SDK" + ], + "metadata": { + "id": "YYoxLloSaNWW" + } + }, + { + "cell_type": "code", + "source": [ + "import openai\n", + "client = openai.AzureOpenAI(\n", + " api_key=\"anything\",\n", + " base_url=\"http://0.0.0.0:4000\"\n", + ")\n", + "\n", + "# request sent to model set on litellm proxy, `litellm --model`\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"this is a test request, write a short poem\"\n", + " }\n", + " ],\n", + " extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n", + " \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n", + " \"generation_name\": \"ishaan-generation-openai-client\",\n", + " \"generation_id\": \"openai-client-gen-id22\",\n", + " \"trace_id\": \"openai-client-trace-id22\",\n", + " \"trace_user_id\": \"openai-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "print(response)" + ], + "metadata": { + "id": "yA1XcgowaSRy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain Python" + ], + "metadata": { + "id": "yl9qhDvnaTpL" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import HumanMessage, SystemMessage\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"anything\"\n", + "\n", + "chat = ChatOpenAI(\n", + " openai_api_base=\"http://0.0.0.0:4000\",\n", + " model = \"gpt-3.5-turbo\",\n", + " temperature=0.1,\n", + " extra_body={\n", + " \"metadata\": {\n", + " \"generation_name\": \"ishaan-generation-langchain-client\",\n", + " \"generation_id\": \"langchain-client-gen-id22\",\n", + " \"trace_id\": \"langchain-client-trace-id22\",\n", + " \"trace_user_id\": \"langchain-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "messages = [\n", + " SystemMessage(\n", + " content=\"You are a helpful assistant that im using to make a test request to.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"test from litellm. tell me why it's amazing in 1 sentence\"\n", + " ),\n", + "]\n", + "response = chat(messages)\n", + "\n", + "print(response)" + ], + "metadata": { + "id": "5MUZgSquaW5t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Curl" + ], + "metadata": { + "id": "B9eMgnULbRaz" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "```\n", + "curl -X POST 'http://0.0.0.0:4000/chat/completions' \\\n", + " -H 'Content-Type: application/json' \\\n", + " -d '{\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"what llm are you\"\n", + " }\n", + " ],\n", + " \"metadata\": {\n", + " \"generation_name\": \"ishaan-test-generation\",\n", + " \"generation_id\": \"gen-id22\",\n", + " \"trace_id\": \"trace-id22\",\n", + " \"trace_user_id\": \"user-id2\"\n", + " }\n", + "}'\n", + "```\n", + "\n" + ], + "metadata": { + "id": "VWCCk5PFcmhS" + } + }, + { + "cell_type": "markdown", + "source": [ + "### LlamaIndex" + ], + "metadata": { + "id": "drBAm2e1b6xe" + } + }, + { + "cell_type": "code", + "source": [ + "import os, dotenv\n", + "\n", + "from llama_index.llms import AzureOpenAI\n", + "from llama_index.embeddings import AzureOpenAIEmbedding\n", + "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", + "\n", + "llm = AzureOpenAI(\n", + " engine=\"azure-gpt-3.5\", # model_name on litellm proxy\n", + " temperature=0.0,\n", + " azure_endpoint=\"http://0.0.0.0:4000\", # litellm proxy endpoint\n", + " api_key=\"sk-1234\", # litellm proxy API Key\n", + " api_version=\"2023-07-01-preview\",\n", + ")\n", + "\n", + "embed_model = AzureOpenAIEmbedding(\n", + " deployment_name=\"azure-embedding-model\",\n", + " azure_endpoint=\"http://0.0.0.0:4000\",\n", + " api_key=\"sk-1234\",\n", + " api_version=\"2023-07-01-preview\",\n", + ")\n", + "\n", + "\n", + "documents = SimpleDirectoryReader(\"llama_index_data\").load_data()\n", + "service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n", + "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n", + "\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\"What did the author do growing up?\")\n", + "print(response)\n" + ], + "metadata": { + "id": "d0bZcv8fb9mL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain JS" + ], + "metadata": { + "id": "xypvNdHnb-Yy" + } + }, + { + "cell_type": "code", + "source": [ + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "\n", + "const model = new ChatOpenAI({\n", + " modelName: \"gpt-4\",\n", + " openAIApiKey: \"sk-1234\",\n", + " modelKwargs: {\"metadata\": \"hello world\"} // 👈 PASS Additional params here\n", + "}, {\n", + " basePath: \"http://0.0.0.0:4000\",\n", + "});\n", + "\n", + "const message = await model.invoke(\"Hi there!\");\n", + "\n", + "console.log(message);\n" + ], + "metadata": { + "id": "R55mK2vCcBN2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI JS" + ], + "metadata": { + "id": "nC4bLifCcCiW" + } + }, + { + "cell_type": "code", + "source": [ + "const { OpenAI } = require('openai');\n", + "\n", + "const openai = new OpenAI({\n", + " apiKey: \"sk-1234\", // This is the default and can be omitted\n", + " baseURL: \"http://0.0.0.0:4000\"\n", + "});\n", + "\n", + "async function main() {\n", + " const chatCompletion = await openai.chat.completions.create({\n", + " messages: [{ role: 'user', content: 'Say this is a test' }],\n", + " model: 'gpt-3.5-turbo',\n", + " }, {\"metadata\": {\n", + " \"generation_name\": \"ishaan-generation-openaijs-client\",\n", + " \"generation_id\": \"openaijs-client-gen-id22\",\n", + " \"trace_id\": \"openaijs-client-trace-id22\",\n", + " \"trace_user_id\": \"openaijs-client-user-id2\"\n", + " }});\n", + "}\n", + "\n", + "main();\n" + ], + "metadata": { + "id": "MICH8kIMcFpg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Anthropic SDK" + ], + "metadata": { + "id": "D1Q07pEAcGTb" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from anthropic import Anthropic\n", + "\n", + "client = Anthropic(\n", + " base_url=\"http://localhost:4000\", # proxy endpoint\n", + " api_key=\"sk-s4xN1IiLTCytwtZFJaYQrA\", # litellm proxy virtual key\n", + ")\n", + "\n", + "message = client.messages.create(\n", + " max_tokens=1024,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Hello, Claude\",\n", + " }\n", + " ],\n", + " model=\"claude-3-opus-20240229\",\n", + ")\n", + "print(message.content)" + ], + "metadata": { + "id": "qBjFcAvgcI3t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## /embeddings" + ], + "metadata": { + "id": "dFAR4AJGcONI" + } + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI Python SDK" + ], + "metadata": { + "id": "lgNoM281cRzR" + } + }, + { + "cell_type": "code", + "source": [ + "import openai\n", + "from openai import OpenAI\n", + "\n", + "# set base_url to your proxy server\n", + "# set api_key to send to proxy server\n", + "client = OpenAI(api_key=\"\", base_url=\"http://0.0.0.0:4000\")\n", + "\n", + "response = client.embeddings.create(\n", + " input=[\"hello from litellm\"],\n", + " model=\"text-embedding-ada-002\"\n", + ")\n", + "\n", + "print(response)\n" + ], + "metadata": { + "id": "NY3DJhPfcQhA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain Embeddings" + ], + "metadata": { + "id": "hmbg-DW6cUZs" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.embeddings import OpenAIEmbeddings\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"sagemaker-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"SAGEMAKER EMBEDDINGS\")\n", + "print(query_result[:5])\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"bedrock-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"BEDROCK EMBEDDINGS\")\n", + "print(query_result[:5])\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"bedrock-titan-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"TITAN EMBEDDINGS\")\n", + "print(query_result[:5])" + ], + "metadata": { + "id": "lX2S8Nl1cWVP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Curl Request" + ], + "metadata": { + "id": "oqGbWBCQcYfd" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "```curl\n", + "curl -X POST 'http://0.0.0.0:4000/embeddings' \\\n", + " -H 'Content-Type: application/json' \\\n", + " -d ' {\n", + " \"model\": \"text-embedding-ada-002\",\n", + " \"input\": [\"write a litellm poem\"]\n", + " }'\n", + "```\n", + "\n" + ], + "metadata": { + "id": "7rkIMV9LcdwQ" + } + } + ] +} \ No newline at end of file From 15d488c25c6032c497158991ca82da7e87c32064 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 16:25:57 -0700 Subject: [PATCH 125/655] docs set max_request_size --- docs/my-website/docs/proxy/enterprise.md | 51 +++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index 3607cb07f..ceeb91501 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -21,7 +21,7 @@ Features: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - - ✅ Set Max Request / File Size on Requests + - ✅ [Set Max Request Size / File Size on Requests](#set-max-request--response-size-on-litellm-proxy) - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - **Enterprise Spend Tracking Features** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) @@ -1288,3 +1288,52 @@ How it works? **Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes. + +## Set Max Request / Response Size on LiteLLM Proxy + +Use this if you want to set a maximum request / response size for your proxy server. If a request size is above the size it gets rejected + slack alert triggered + +#### Usage +**Step 1.** Set `max_request_size_mb` and `max_response_size_mb` + +For this example we set a very low limit on `max_request_size_mb` and expect it to get rejected + +:::info +In production we recommend setting a `max_request_size_mb` / `max_response_size_mb` around `32 MB` + +::: + +```yaml +model_list: + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ +general_settings: + master_key: sk-1234 + + # Security controls + max_request_size_mb: 0.000000001 # 👈 Key Change - Max Request Size in MB. Set this very low for testing + max_response_size_mb: 100 # 👈 Key Change - Max Response Size in MB +``` + +**Step 2.** Test it with `/chat/completions` request + +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +**Expected Response from request** +We expect this to fail since the request size is over `max_request_size_mb` +```shell +{"error":{"message":"Request size is too large. Request size is 0.0001125335693359375 MB. Max size is 1e-09 MB","type":"bad_request_error","param":"content-length","code":400}} +``` From a7785c624bcd4e93ce177cc5f66050fdab48a36e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 16:29:06 -0700 Subject: [PATCH 126/655] docs(user_keys.md): improve openai migration docs --- docs/my-website/docs/proxy/user_keys.md | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 7417ef6bd..30bb28b64 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -1,7 +1,39 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 💡 Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl +# 💡 Migrating from OpenAI (Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl) + +LiteLLM Proxy is **OpenAI-Compatible**, and supports: +* /chat/completions +* /embeddings +* /completions +* /image/generations +* /moderations +* /audio/transcriptions +* /audio/speech +* [Assistants API endpoints](https://docs.litellm.ai/docs/assistants) +* [Batches API endpoints](https://docs.litellm.ai/docs/batches) + +LiteLLM Proxy is **Azure OpenAI-compatible**: +* /chat/completions +* /completions +* /embeddings + +LiteLLM Proxy is **Anthropic-compatible**: +* /messages + +This doc covers: + +* /chat/completion +* /embedding + + +These are **selected examples**. LiteLLM Proxy is **OpenAI-Compatible**, it works with any project that calls OpenAI. Just change the `base_url`, `api_key` and `model`. + +To pass provider-specific args, [go here](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage) + +To drop unsupported params (E.g. frequency_penalty for bedrock with librechat), [go here](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage) + :::info From 6f9c29d39b537fcd4fe42f50f7c5184d92302b2a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 16:51:45 -0700 Subject: [PATCH 127/655] fix(databricks.py): handle DONE chunk from databricks --- litellm/llms/databricks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 1a276f52c..363b222fe 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -795,6 +795,8 @@ class ModelResponseIterator: try: chunk = chunk.replace("data:", "") chunk = chunk.strip() + if chunk == "[DONE]": + raise StopAsyncIteration if len(chunk) > 0: json_chunk = json.loads(chunk) return self.chunk_parser(chunk=json_chunk) From 41ca6fd52a22fbd8a9355381a96e49b3656bb631 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 16:53:00 -0700 Subject: [PATCH 128/655] feat - check max response size --- litellm/proxy/auth/auth_utils.py | 47 +++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index 83c676518..f9be71c35 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -1,4 +1,5 @@ import re +import sys from fastapi import Request @@ -88,7 +89,11 @@ async def check_if_request_size_is_safe(request: Request) -> bool: request (Request): The incoming request. Returns: - bool: True if the request size is within the limit, False otherwise. + bool: True if the request size is within the limit + + Raises: + ProxyException: If the request size is too large + """ from litellm.proxy.proxy_server import general_settings, premium_user @@ -138,6 +143,46 @@ async def check_if_request_size_is_safe(request: Request) -> bool: return True +async def check_response_size_is_safe(response: Any) -> bool: + """ + Enterprise Only: + - Checks if the response size is within the limit + + Args: + response (Any): The response to check. + + Returns: + bool: True if the response size is within the limit + + Raises: + ProxyException: If the response size is too large + + """ + + from litellm.proxy.proxy_server import general_settings, premium_user + + max_response_size_mb = general_settings.get("max_response_size_mb", None) + if max_response_size_mb is not None: + # Check if premium user + if premium_user is not True: + verbose_proxy_logger.warning( + f"using max_response_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}" + ) + return True + + response_size_mb = bytes_to_mb(bytes_value=sys.getsizeof(response)) + verbose_proxy_logger.debug(f"response size in MB={response_size_mb}") + if response_size_mb > max_response_size_mb: + raise ProxyException( + message=f"Response size is too large. Response size is {response_size_mb} MB. Max size is {max_response_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + + return True + + def bytes_to_mb(bytes_value: int): """ Helper to convert bytes to MB From b2f72338f6db820383134db75b1c1781ceb01253 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 16:53:39 -0700 Subject: [PATCH 129/655] feat check check_response_size_is_safe --- litellm/proxy/proxy_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 274e28ac2..46f67b610 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -123,6 +123,7 @@ from litellm.proxy.auth.auth_checks import ( get_user_object, log_to_opentelemetry, ) +from litellm.proxy.auth.auth_utils import check_response_size_is_safe from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.auth.litellm_license import LicenseCheck from litellm.proxy.auth.model_checks import ( @@ -3000,6 +3001,7 @@ async def chat_completion( **additional_headers, ) ) + await check_response_size_is_safe(response=response) return response except RejectedRequestError as e: @@ -3241,7 +3243,7 @@ async def completion( response_cost=response_cost, ) ) - + await check_response_size_is_safe(response=response) return response except RejectedRequestError as e: _data = e.request_data @@ -3491,6 +3493,7 @@ async def embeddings( call_id=litellm_call_id, ) ) + await check_response_size_is_safe(response=response) return response except Exception as e: From f633f7d92d30337ca2b63686e139cfdf86d3a000 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 16:54:31 -0700 Subject: [PATCH 130/655] set max_response_size_mb --- litellm/proxy/proxy_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 2fe8eb2a5..e47450b6a 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -37,7 +37,7 @@ general_settings: # Security controls max_request_size_mb: 100 # google cloud run maximum repsonses size is 32MB - max_response_size_mb: 100 + max_response_size_mb: 10 litellm_settings: success_callback: ["langfuse"] \ No newline at end of file From 3511aadf991dfdc0ab4a13aed9a3f9043e64e439 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 17:00:39 -0700 Subject: [PATCH 131/655] allow setting max request / response size on admin UI --- litellm/proxy/_types.py | 8 ++++++++ litellm/proxy/proxy_server.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 25aa942e5..d3f1bc844 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -1144,6 +1144,14 @@ class ConfigGeneralSettings(LiteLLMBase): global_max_parallel_requests: Optional[int] = Field( None, description="global max parallel requests to allow for a proxy instance." ) + max_request_size_mb: Optional[int] = Field( + None, + description="max request size in MB, if a request is larger than this size it will be rejected", + ) + max_response_size_mb: Optional[int] = Field( + None, + description="max response size in MB, if a response is larger than this size it will be rejected", + ) infer_model_from_keys: Optional[bool] = Field( None, description="for `/models` endpoint, infers available model based on environment keys (e.g. OPENAI_API_KEY)", diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 46f67b610..33f021210 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -9083,6 +9083,8 @@ async def get_config_list( allowed_args = { "max_parallel_requests": {"type": "Integer"}, "global_max_parallel_requests": {"type": "Integer"}, + "max_request_size_mb": {"type": "Integer"}, + "max_response_size_mb": {"type": "Integer"}, } return_val = [] From 0627468455d0e5fd3090ff95fefe8b099b854204 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 27 Jul 2024 20:21:39 -0700 Subject: [PATCH 132/655] fix checking mode on health checks --- litellm/main.py | 4 ++-- litellm/model_prices_and_context_window_backup.json | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 672029f69..134617ba0 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4797,12 +4797,12 @@ async def ahealth_check( raise Exception("model not set") if model in litellm.model_cost and mode is None: - mode = litellm.model_cost[model]["mode"] + mode = litellm.model_cost[model].get("mode") model, custom_llm_provider, _, _ = get_llm_provider(model=model) if model in litellm.model_cost and mode is None: - mode = litellm.model_cost[model]["mode"] + mode = litellm.model_cost[model].get("mode") mode = mode or "chat" # default to chat completion calls diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 0f20f6689..a6d66750c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4373,7 +4373,7 @@ "litellm_provider": "perplexity", "mode": "chat" }, - "fireworks_ai/firefunction-v2": { + "fireworks_ai/accounts/fireworks/models/firefunction-v2": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -4384,7 +4384,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/mixtral-8x22b-instruct-hf": { + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, @@ -4395,7 +4395,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/qwen2-72b-instruct": { + "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, @@ -4406,7 +4406,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/yi-large": { + "fireworks_ai/accounts/fireworks/models/yi-large": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, @@ -4417,7 +4417,7 @@ "supports_function_calling": true, "source": "https://fireworks.ai/pricing" }, - "fireworks_ai/deepseek-coder-v2-instruct": { + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 8192, From 6d5aedc48d5d5fd24f7328cb4b77b638c409f234 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 20:22:35 -0700 Subject: [PATCH 133/655] feat(databricks.py): support vertex mistral cost tracking --- litellm/cost_calculator.py | 4 +- litellm/llms/databricks.py | 101 +++--------------- .../tests/test_amazing_vertex_completion.py | 2 + litellm/tests/test_completion_cost.py | 36 +++++++ 4 files changed, 56 insertions(+), 87 deletions(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 9bd5f90be..b680fc8a5 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -509,7 +509,7 @@ def completion_cost( ): model = completion_response._hidden_params.get("model", model) custom_llm_provider = completion_response._hidden_params.get( - "custom_llm_provider", "" + "custom_llm_provider", custom_llm_provider or "" ) region_name = completion_response._hidden_params.get( "region_name", region_name @@ -732,7 +732,7 @@ def response_cost_calculator( ) return response_cost except litellm.NotFoundError as e: - print_verbose( + verbose_logger.debug( # debug since it can be spammy in logs, for calls f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map." ) return None diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 363b222fe..0567e6e05 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -259,87 +259,6 @@ class DatabricksChatCompletion(BaseLLM): api_base = "{}/embeddings".format(api_base) return api_base, headers - def process_response( - self, - model: str, - response: Union[requests.Response, httpx.Response], - model_response: ModelResponse, - stream: bool, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, - optional_params: dict, - api_key: str, - data: Union[dict, str], - messages: List, - print_verbose, - encoding, - ) -> ModelResponse: - ## LOGGING - logging_obj.post_call( - input=messages, - api_key=api_key, - original_response=response.text, - additional_args={"complete_input_dict": data}, - ) - print_verbose(f"raw model_response: {response.text}") - ## RESPONSE OBJECT - try: - completion_response = response.json() - except: - raise DatabricksError( - message=response.text, status_code=response.status_code - ) - if "error" in completion_response: - raise DatabricksError( - message=str(completion_response["error"]), - status_code=response.status_code, - ) - else: - text_content = "" - tool_calls = [] - for content in completion_response["content"]: - if content["type"] == "text": - text_content += content["text"] - ## TOOL CALLING - elif content["type"] == "tool_use": - tool_calls.append( - { - "id": content["id"], - "type": "function", - "function": { - "name": content["name"], - "arguments": json.dumps(content["input"]), - }, - } - ) - - _message = litellm.Message( - tool_calls=tool_calls, - content=text_content or None, - ) - model_response.choices[0].message = _message # type: ignore - model_response._hidden_params["original_response"] = completion_response[ - "content" - ] # allow user to access raw anthropic tool calling response - - model_response.choices[0].finish_reason = map_finish_reason( - completion_response["stop_reason"] - ) - - ## CALCULATING USAGE - prompt_tokens = completion_response["usage"]["input_tokens"] - completion_tokens = completion_response["usage"]["output_tokens"] - total_tokens = prompt_tokens + completion_tokens - - model_response.created = int(time.time()) - model_response.model = model - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - ) - setattr(model_response, "usage", usage) # type: ignore - return model_response - async def acompletion_stream_function( self, model: str, @@ -392,6 +311,7 @@ class DatabricksChatCompletion(BaseLLM): logging_obj, stream, data: dict, + base_model: Optional[str], optional_params: dict, litellm_params=None, logger_fn=None, @@ -420,7 +340,11 @@ class DatabricksChatCompletion(BaseLLM): except Exception as e: raise DatabricksError(status_code=500, message=str(e)) - return ModelResponse(**response_json) + response = ModelResponse(**response_json) + + if base_model is not None: + response._hidden_params["model"] = base_model + return response def completion( self, @@ -443,6 +367,7 @@ class DatabricksChatCompletion(BaseLLM): client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): custom_endpoint: Optional[bool] = optional_params.pop("custom_endpoint", None) + base_model: Optional[str] = optional_params.pop("base_model", None) api_base, headers = self._validate_environment( api_base=api_base, api_key=api_key, @@ -476,11 +401,11 @@ class DatabricksChatCompletion(BaseLLM): "headers": headers, }, ) - if acompletion == True: + if acompletion is True: if client is not None and isinstance(client, HTTPHandler): client = None if ( - stream is not None and stream == True + stream is not None and stream is True ): # if function call - fake the streaming (need complete blocks for output parsing in openai format) print_verbose("makes async anthropic streaming POST request") data["stream"] = stream @@ -521,6 +446,7 @@ class DatabricksChatCompletion(BaseLLM): logger_fn=logger_fn, headers=headers, timeout=timeout, + base_model=base_model, ) else: if client is None or not isinstance(client, HTTPHandler): @@ -562,7 +488,12 @@ class DatabricksChatCompletion(BaseLLM): except Exception as e: raise DatabricksError(status_code=500, message=str(e)) - return ModelResponse(**response_json) + response = ModelResponse(**response_json) + + if base_model is not None: + response._hidden_params["model"] = base_model + + return response async def aembedding( self, diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 5419c25ff..e2d35c972 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -939,6 +939,8 @@ async def test_partner_models_httpx(model, sync_mode): response_format_tests(response=response) print(f"response: {response}") + + assert response._hidden_params["response_cost"] > 0 except litellm.RateLimitError as e: pass except Exception as e: diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index 41448bd56..53cbaa31d 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -918,6 +918,42 @@ def test_vertex_ai_llama_predict_cost(): assert predictive_cost == 0 +def test_vertex_ai_mistral_predict_cost(): + from litellm.types.utils import Choices, Message, ModelResponse, Usage + + response_object = ModelResponse( + id="26c0ef045020429d9c5c9b078c01e564", + choices=[ + Choices( + finish_reason="stop", + index=0, + message=Message( + content="Hello! I'm Litellm Bot, your helpful assistant. While I can't provide real-time weather updates, I can help you find a reliable weather service or guide you on how to check the weather on your device. Would you like assistance with that?", + role="assistant", + tool_calls=None, + function_call=None, + ), + ) + ], + created=1722124652, + model="vertex_ai/mistral-large", + object="chat.completion", + system_fingerprint=None, + usage=Usage(prompt_tokens=32, completion_tokens=55, total_tokens=87), + ) + model = "mistral-large@2407" + messages = [{"role": "user", "content": "Hey, hows it going???"}] + custom_llm_provider = "vertex_ai" + predictive_cost = completion_cost( + completion_response=response_object, + model=model, + messages=messages, + custom_llm_provider=custom_llm_provider, + ) + + assert predictive_cost > 0 + + @pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"]) def test_completion_cost_tts(model): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" From e0f3cd580cb85066f7d36241a03c30aa50a8a31d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 22:30:04 -0700 Subject: [PATCH 134/655] fix(factory.py): support mistral ai prefix:true in messages Fixes https://github.com/BerriAI/litellm/issues/4882 --- litellm/llms/prompt_templates/factory.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index efe812497..2218fa568 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -235,6 +235,12 @@ def mistral_api_pt(messages): """ new_messages = [] for m in messages: + special_keys = ["role", "content", "tool_calls"] + extra_args = {} + if isinstance(m, dict): + for k, v in m.items(): + if k not in special_keys: + extra_args[k] = v texts = "" if isinstance(m["content"], list): for c in m["content"]: @@ -244,7 +250,8 @@ def mistral_api_pt(messages): texts += c["text"] elif isinstance(m["content"], str): texts = m["content"] - new_m = {"role": m["role"], "content": texts} + + new_m = {"role": m["role"], "content": texts, **extra_args} if new_m["role"] == "tool" and m.get("name"): new_m["name"] = m["name"] From 21eea287230af8cc7f4a45a348f7a4e9b1323a2d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 22:44:15 -0700 Subject: [PATCH 135/655] docs(vertex.md): add mistral api to docs --- docs/my-website/docs/providers/vertex.md | 81 +++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md index f87597046..4b95ff2a8 100644 --- a/docs/my-website/docs/providers/vertex.md +++ b/docs/my-website/docs/providers/vertex.md @@ -775,7 +775,6 @@ vertex_ai_location = "your-vertex-location" # can also set this as os.environ["V response = completion( model="vertex_ai/" + model, messages=[{"role": "user", "content": "hi"}], - temperature=0.7, vertex_ai_project=vertex_ai_project, vertex_ai_location=vertex_ai_location, ) @@ -828,6 +827,86 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
+## Mistral API + +[**Supported OpenAI Params**](https://github.com/BerriAI/litellm/blob/e0f3cd580cb85066f7d36241a03c30aa50a8a31d/litellm/llms/openai.py#L137) + +| Model Name | Function Call | +|------------------|--------------------------------------| +| meta/llama3-405b-instruct-maas | `completion('vertex_ai/mistral-large@2407', messages)` | + +### Usage + + + + +```python +from litellm import completion +import os + +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "" + +model = "mistral-large@2407" + +vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"] +vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"] + +response = completion( + model="vertex_ai/" + model, + messages=[{"role": "user", "content": "hi"}], + vertex_ai_project=vertex_ai_project, + vertex_ai_location=vertex_ai_location, +) +print("\nModel Response", response) +``` + + + +**1. Add to config** + +```yaml +model_list: + - model_name: anthropic-mistral + litellm_params: + model: vertex_ai/mistral-large@2407 + vertex_ai_project: "my-test-project" + vertex_ai_location: "us-east-1" + - model_name: anthropic-mistral + litellm_params: + model: vertex_ai/mistral-large@2407 + vertex_ai_project: "my-test-project" + vertex_ai_location: "us-west-1" +``` + +**2. Start proxy** + +```bash +litellm --config /path/to/config.yaml + +# RUNNING at http://0.0.0.0:4000 +``` + +**3. Test it!** + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "anthropic-mistral", # 👈 the 'model_name' in config + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + }' +``` + + + + + ## Model Garden | Model Name | Function Call | |------------------|--------------------------------------| From 92b539b42a3936fd534e008ae1e1852dd27ed96c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 07:51:44 -0700 Subject: [PATCH 136/655] fix(auth_checks.py): handle writing team object to redis caching correctly --- litellm/proxy/_new_secret_config.yaml | 4 ++++ litellm/proxy/auth/auth_checks.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index b712afaf0..d13fb3f37 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -3,3 +3,7 @@ model_list: litellm_params: model: "*" +litellm_settings: + cache: true + cache_params: + type: redis \ No newline at end of file diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index 7c5356a37..75f5dd108 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -373,12 +373,13 @@ async def _cache_team_object( proxy_logging_obj: Optional[ProxyLogging], ): key = "team_id:{}".format(team_id) - await user_api_key_cache.async_set_cache(key=key, value=team_table) + value = team_table.model_dump_json(exclude_unset=True) + await user_api_key_cache.async_set_cache(key=key, value=value) ## UPDATE REDIS CACHE ## if proxy_logging_obj is not None: await proxy_logging_obj.internal_usage_cache.async_set_cache( - key=key, value=team_table + key=key, value=value ) From b2fcf65653edc2a962d09dc893f458e65371fb73 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 08:00:28 -0700 Subject: [PATCH 137/655] log file_size_in_mb in metadata --- litellm/proxy/common_utils/http_parsing_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index 8db1e8794..7164385a7 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -56,6 +56,9 @@ def check_file_size_under_limit( file_contents_size = file.size or 0 file_content_size_in_mb = file_contents_size / (1024 * 1024) + if "metadata" not in request_data: + request_data["metadata"] = {} + request_data["metadata"]["file_size_in_mb"] = file_content_size_in_mb max_file_size_mb = None if llm_router is not None and request_data["model"] in router_model_names: From 95f063f978d7bd30ab793a3a520e720883b054a6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 08:03:08 -0700 Subject: [PATCH 138/655] fix default input/output values for /audio/trancription logging --- litellm/integrations/langfuse.py | 4 ++++ litellm/proxy/proxy_config.yaml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 0217f7458..73a8ed0d9 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -144,6 +144,10 @@ class LangFuseLogger: f"Langfuse Logging - Enters logging function for model {kwargs}" ) + # set default values for input/output for langfuse logging + input = None + output = None + litellm_params = kwargs.get("litellm_params", {}) litellm_call_id = kwargs.get("litellm_call_id", None) metadata = ( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7a8bd9535..f7e5a894f 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -34,4 +34,4 @@ general_settings: max_response_size_mb: 10 litellm_settings: - callbacks: ["otel"] \ No newline at end of file + success_callback: ["langfuse"] \ No newline at end of file From ec28e8e630e4b454a86a95bfe55e2882fd7992bb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 08:17:19 -0700 Subject: [PATCH 139/655] test - logging litellm-atranscription --- litellm/tests/test_alangfuse.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index 8f91ffa67..097974e2f 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -245,6 +245,48 @@ async def test_langfuse_logging_without_request_response(stream, langfuse_client pytest.fail(f"An exception occurred - {e}") +# Get the current directory of the file being run +pwd = os.path.dirname(os.path.realpath(__file__)) +print(pwd) + +file_path = os.path.join(pwd, "gettysburg.wav") + +audio_file = open(file_path, "rb") + + +@pytest.mark.asyncio +async def test_langfuse_logging_audio_transcriptions(langfuse_client): + """ + Test that creates a trace with masked input and output + """ + import uuid + + _unique_trace_name = f"litellm-test-{str(uuid.uuid4())}" + litellm.set_verbose = True + litellm.success_callback = ["langfuse"] + await litellm.atranscription( + model="whisper-1", + file=audio_file, + metadata={ + "trace_id": _unique_trace_name, + }, + ) + + langfuse_client.flush() + await asyncio.sleep(2) + + # get trace with _unique_trace_name + trace = langfuse_client.get_trace(id=_unique_trace_name) + generations = list( + reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data) + ) + + print("generations for given trace=", generations) + + assert len(generations) == 1 + assert generations[0].name == "litellm-atranscription" + + @pytest.mark.asyncio async def test_langfuse_masked_input_output(langfuse_client): """ From 31445ab20a288d1545e460447de265ee17d51704 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 08:19:30 -0700 Subject: [PATCH 140/655] fix(caching.py): support /completion caching by default updates supported call types in redis cache to cover text_completion caching --- litellm/caching.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/litellm/caching.py b/litellm/caching.py index 557a029d3..fa10095da 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -1692,6 +1692,8 @@ class Cache: "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ] ] ] = [ @@ -1701,6 +1703,8 @@ class Cache: "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ], # s3 Bucket, boto3 configuration s3_bucket_name: Optional[str] = None, @@ -2235,6 +2239,8 @@ def enable_cache( "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ] ] ] = [ @@ -2244,6 +2250,8 @@ def enable_cache( "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ], **kwargs, ): @@ -2300,6 +2308,8 @@ def update_cache( "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ] ] ] = [ @@ -2309,6 +2319,8 @@ def update_cache( "aembedding", "atranscription", "transcription", + "atext_completion", + "text_completion", ], **kwargs, ): From 285925e10a2147ed8d55434c6fd238c25f2a6734 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 08:21:22 -0700 Subject: [PATCH 141/655] log output from /audio on langfuse --- litellm/integrations/langfuse.py | 5 +++++ litellm/tests/test_alangfuse.py | 1 + 2 files changed, 6 insertions(+) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 73a8ed0d9..f0e3faec7 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -202,6 +202,11 @@ class LangFuseLogger: ): input = prompt output = response_obj["data"] + elif response_obj is not None and isinstance( + response_obj, litellm.TranscriptionResponse + ): + input = prompt + output = response_obj["text"] print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") trace_id = None generation_id = None diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index 097974e2f..bf475ae97 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -285,6 +285,7 @@ async def test_langfuse_logging_audio_transcriptions(langfuse_client): assert len(generations) == 1 assert generations[0].name == "litellm-atranscription" + assert generations[0].output is not None @pytest.mark.asyncio From 52b1ca292cd911d7a43f1bee26ea1e573662c96b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 09:01:09 -0700 Subject: [PATCH 142/655] fix(utils.py): check if tools is iterable before indexing into it Fixes https://github.com/BerriAI/litellm/issues/4933 --- litellm/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index ec1370c30..411d2509d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2772,7 +2772,9 @@ def get_optional_params( message=f"Function calling is not supported by {custom_llm_provider}.", ) - if "tools" in non_default_params: + if "tools" in non_default_params and isinstance( + non_default_params, list + ): # fixes https://github.com/BerriAI/litellm/issues/4933 tools = non_default_params["tools"] for ( tool From 66dbd938e8eec794b0f649c0ed8b8a59cbe8f5e4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 12:01:54 -0700 Subject: [PATCH 143/655] fix(exceptions.py): use correct status code for content policy exceptions Fixes https://github.com/BerriAI/litellm/issues/4941#issuecomment-2256578732 --- litellm/exceptions.py | 59 ++++++++++++--------------- litellm/main.py | 10 +++++ litellm/proxy/_new_secret_config.yaml | 8 ++-- 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/litellm/exceptions.py b/litellm/exceptions.py index d2337b7f4..f55c0914d 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -122,7 +122,7 @@ class BadRequestError(openai.BadRequestError): # type: ignore self.model = model self.llm_provider = llm_provider self.litellm_debug_info = litellm_debug_info - response = response or httpx.Response( + response = httpx.Response( status_code=self.status_code, request=httpx.Request( method="GET", url="https://litellm.ai" @@ -287,16 +287,13 @@ class RateLimitError(openai.RateLimitError): # type: ignore self.litellm_debug_info = litellm_debug_info self.max_retries = max_retries self.num_retries = num_retries - if response is None: - self.response = httpx.Response( - status_code=429, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ) - else: - self.response = response + self.response = httpx.Response( + status_code=429, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ) super().__init__( self.message, response=self.response, body=None ) # Call the base class constructor with the parameters it needs @@ -334,7 +331,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore self.llm_provider = llm_provider self.litellm_debug_info = litellm_debug_info request = httpx.Request(method="POST", url="https://api.openai.com/v1") - self.response = response or httpx.Response(status_code=400, request=request) + self.response = httpx.Response(status_code=400, request=request) super().__init__( message=self.message, model=self.model, # type: ignore @@ -377,7 +374,7 @@ class RejectedRequestError(BadRequestError): # type: ignore self.litellm_debug_info = litellm_debug_info self.request_data = request_data request = httpx.Request(method="POST", url="https://api.openai.com/v1") - response = httpx.Response(status_code=500, request=request) + response = httpx.Response(status_code=400, request=request) super().__init__( message=self.message, model=self.model, # type: ignore @@ -419,7 +416,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore self.llm_provider = llm_provider self.litellm_debug_info = litellm_debug_info request = httpx.Request(method="POST", url="https://api.openai.com/v1") - self.response = response or httpx.Response(status_code=500, request=request) + self.response = httpx.Response(status_code=400, request=request) super().__init__( message=self.message, model=self.model, # type: ignore @@ -463,16 +460,13 @@ class ServiceUnavailableError(openai.APIStatusError): # type: ignore self.litellm_debug_info = litellm_debug_info self.max_retries = max_retries self.num_retries = num_retries - if response is None: - self.response = httpx.Response( - status_code=self.status_code, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ) - else: - self.response = response + self.response = httpx.Response( + status_code=self.status_code, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ) super().__init__( self.message, response=self.response, body=None ) # Call the base class constructor with the parameters it needs @@ -512,16 +506,13 @@ class InternalServerError(openai.InternalServerError): # type: ignore self.litellm_debug_info = litellm_debug_info self.max_retries = max_retries self.num_retries = num_retries - if response is None: - self.response = httpx.Response( - status_code=self.status_code, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ) - else: - self.response = response + self.response = httpx.Response( + status_code=self.status_code, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ) super().__init__( self.message, response=self.response, body=None ) # Call the base class constructor with the parameters it needs diff --git a/litellm/main.py b/litellm/main.py index 4a0d1251e..fc65ae73c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -500,6 +500,16 @@ def mock_completion( llm_provider=getattr(mock_response, "llm_provider", custom_llm_provider or "openai"), # type: ignore model=model, ) + elif isinstance(mock_response, str) and mock_response.startswith( + "Exception: content_filter_policy" + ): + raise litellm.MockException( + status_code=400, + message=mock_response, + llm_provider="azure", + model=model, # type: ignore + request=httpx.Request(method="POST", url="https://api.openai.com/v1/"), + ) time_delay = kwargs.get("mock_delay", None) if time_delay is not None: time.sleep(time_delay) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index d13fb3f37..c31c9873a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -3,7 +3,7 @@ model_list: litellm_params: model: "*" -litellm_settings: - cache: true - cache_params: - type: redis \ No newline at end of file +# litellm_settings: +# cache: true +# cache_params: +# type: redis \ No newline at end of file From 452441ae032c208cbbba6e22c9d54e42e3861812 Mon Sep 17 00:00:00 2001 From: David Leen Date: Mon, 29 Jul 2024 10:44:10 -0700 Subject: [PATCH 144/655] Fix: #4942. Remove verbose logging when exception can be handled --- litellm/main.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 4a0d1251e..c073d1912 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -413,11 +413,6 @@ async def acompletion( ) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls) return response except Exception as e: - verbose_logger.error( - "litellm.acompletion(): Exception occured - {}\n{}".format( - str(e), traceback.format_exc() - ) - ) verbose_logger.debug(traceback.format_exc()) custom_llm_provider = custom_llm_provider or "openai" raise exception_type( From 59384c84a547eea0d4c8471531dc05aa05e06b33 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 12:28:12 -0700 Subject: [PATCH 145/655] fix(utils.py): correctly re-raise azure api connection error ' --- .pre-commit-config.yaml | 12 ++++++------ litellm/exceptions.py | 5 +++-- litellm/utils.py | 8 ++++++++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a33473b72..d429bc6b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ repos: - repo: local hooks: - - id: mypy - name: mypy - entry: python3 -m mypy --ignore-missing-imports - language: system - types: [python] - files: ^litellm/ + # - id: mypy + # name: mypy + # entry: python3 -m mypy --ignore-missing-imports + # language: system + # types: [python] + # files: ^litellm/ - id: isort name: isort entry: isort diff --git a/litellm/exceptions.py b/litellm/exceptions.py index f55c0914d..04558e437 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -582,7 +582,7 @@ class APIConnectionError(openai.APIConnectionError): # type: ignore message, llm_provider, model, - request: httpx.Request, + request: Optional[httpx.Request] = None, litellm_debug_info: Optional[str] = None, max_retries: Optional[int] = None, num_retries: Optional[int] = None, @@ -592,9 +592,10 @@ class APIConnectionError(openai.APIConnectionError): # type: ignore self.model = model self.status_code = 500 self.litellm_debug_info = litellm_debug_info + self.request = httpx.Request(method="POST", url="https://api.openai.com/v1") self.max_retries = max_retries self.num_retries = num_retries - super().__init__(message=self.message, request=request) + super().__init__(message=self.message, request=self.request) def __str__(self): _message = self.message diff --git a/litellm/utils.py b/litellm/utils.py index 411d2509d..ddbd039fe 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7866,6 +7866,14 @@ def exception_type( model=model, litellm_debug_info=extra_information, ) + elif "Connection error" in error_str: + exception_mapping_worked = True + raise APIConnectionError( + message=f"{exception_provider} APIConnectionError - {message}", + llm_provider=custom_llm_provider, + model=model, + litellm_debug_info=extra_information, + ) elif hasattr(original_exception, "status_code"): exception_mapping_worked = True if original_exception.status_code == 400: From dd2d61bfce98c321b0ac0de7126fc498dd7570f8 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 12:29:56 -0700 Subject: [PATCH 146/655] build(pre-commit.yaml): update --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d429bc6b8..a33473b72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ repos: - repo: local hooks: - # - id: mypy - # name: mypy - # entry: python3 -m mypy --ignore-missing-imports - # language: system - # types: [python] - # files: ^litellm/ + - id: mypy + name: mypy + entry: python3 -m mypy --ignore-missing-imports + language: system + types: [python] + files: ^litellm/ - id: isort name: isort entry: isort From ae4bcd8a41ab8930e40c186810c93cd28960856a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 13:04:41 -0700 Subject: [PATCH 147/655] fix(utils.py): fix trim_messages to handle tool calling Fixes https://github.com/BerriAI/litellm/issues/4931 --- .pre-commit-config.yaml | 12 +++---- litellm/tests/test_utils.py | 65 +++++++++++++++++++++++++++++++++++++ litellm/types/utils.py | 9 ++++- litellm/utils.py | 25 +++++++++++--- 4 files changed, 100 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a33473b72..d429bc6b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ repos: - repo: local hooks: - - id: mypy - name: mypy - entry: python3 -m mypy --ignore-missing-imports - language: system - types: [python] - files: ^litellm/ + # - id: mypy + # name: mypy + # entry: python3 -m mypy --ignore-missing-imports + # language: system + # types: [python] + # files: ^litellm/ - id: isort name: isort entry: isort diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index db2d9ab5e..976ded7f6 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -173,6 +173,71 @@ def test_trimming_with_system_message_exceeding_max_tokens(): assert len(trimmed_messages) == 1 +def test_trimming_with_tool_calls(): + from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message + + messages = [ + { + "role": "user", + "content": "What's the weather like in San Francisco, Tokyo, and Paris?", + }, + Message( + content=None, + role="assistant", + tool_calls=[ + ChatCompletionMessageToolCall( + function=Function( + arguments='{"location": "San Francisco, CA", "unit": "celsius"}', + name="get_current_weather", + ), + id="call_G11shFcS024xEKjiAOSt6Tc9", + type="function", + ), + ChatCompletionMessageToolCall( + function=Function( + arguments='{"location": "Tokyo, Japan", "unit": "celsius"}', + name="get_current_weather", + ), + id="call_e0ss43Bg7H8Z9KGdMGWyZ9Mj", + type="function", + ), + ChatCompletionMessageToolCall( + function=Function( + arguments='{"location": "Paris, France", "unit": "celsius"}', + name="get_current_weather", + ), + id="call_nRjLXkWTJU2a4l9PZAf5as6g", + type="function", + ), + ], + function_call=None, + ), + { + "tool_call_id": "call_G11shFcS024xEKjiAOSt6Tc9", + "role": "tool", + "name": "get_current_weather", + "content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}', + }, + { + "tool_call_id": "call_e0ss43Bg7H8Z9KGdMGWyZ9Mj", + "role": "tool", + "name": "get_current_weather", + "content": '{"location": "Tokyo", "temperature": "10", "unit": "celsius"}', + }, + { + "tool_call_id": "call_nRjLXkWTJU2a4l9PZAf5as6g", + "role": "tool", + "name": "get_current_weather", + "content": '{"location": "Paris", "temperature": "22", "unit": "celsius"}', + }, + ] + result = trim_messages(messages=messages, max_tokens=1, return_response_tokens=True) + + print(result) + + assert len(result[0]) == 3 # final 3 messages are tool calls + + def test_trimming_should_not_change_original_messages(): messages = [ {"role": "system", "content": "This is a short system message"}, diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e64099aa6..3f7b16a2a 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -312,7 +312,14 @@ class Message(OpenAIObject): FunctionCall(**function_call) if function_call is not None else None ), "tool_calls": ( - [ChatCompletionMessageToolCall(**tool_call) for tool_call in tool_calls] + [ + ( + ChatCompletionMessageToolCall(**tool_call) + if isinstance(tool_call, dict) + else tool_call + ) + for tool_call in tool_calls + ] if tool_calls is not None else None ), diff --git a/litellm/utils.py b/litellm/utils.py index ddbd039fe..2518ed056 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -10658,7 +10658,7 @@ def get_token_count(messages, model): return token_counter(model=model, messages=messages) -def shorten_message_to_fit_limit(message, tokens_needed, model): +def shorten_message_to_fit_limit(message, tokens_needed, model: Optional[str]): """ Shorten a message to fit within a token limit by removing characters from the middle. """ @@ -10666,7 +10666,7 @@ def shorten_message_to_fit_limit(message, tokens_needed, model): # For OpenAI models, even blank messages cost 7 token, # and if the buffer is less than 3, the while loop will never end, # hence the value 10. - if "gpt" in model and tokens_needed <= 10: + if model is not None and "gpt" in model and tokens_needed <= 10: return message content = message["content"] @@ -10720,7 +10720,6 @@ def trim_messages( # if users pass in max tokens, trim to this amount messages = copy.deepcopy(messages) try: - print_verbose(f"trimming messages") if max_tokens is None: # Check if model is valid if model in litellm.model_cost: @@ -10740,6 +10739,17 @@ def trim_messages( system_message += "\n" if system_message else "" system_message += message["content"] + ## Handle Tool Call ## - check if last message is a tool response, return as is - https://github.com/BerriAI/litellm/issues/4931 + tool_messages = [] + + for message in reversed(messages): + if message["role"] != "tool": + break + tool_messages.append(message) + # # Remove the collected tool messages from the original list + if len(tool_messages): + messages = messages[: -len(tool_messages)] + current_tokens = token_counter(model=model, messages=messages) print_verbose(f"Current tokens: {current_tokens}, max tokens: {max_tokens}") @@ -10771,6 +10781,9 @@ def trim_messages( if system_message: final_messages = [system_message_event] + final_messages + if len(tool_messages) > 0: + final_messages.extend(tool_messages) + if ( return_response_tokens ): # if user wants token count with new trimmed messages @@ -10778,7 +10791,11 @@ def trim_messages( return final_messages, response_tokens return final_messages except Exception as e: # [NON-Blocking, if error occurs just return final_messages - print_verbose(f"Got exception while token trimming{e}") + verbose_logger.error( + "Got exception while token trimming - {}\n{}".format( + str(e), traceback.format_exc() + ) + ) return messages From 3eaa1fa217be9b6f176e46a2957a51ab408f1d5c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 13:14:53 -0700 Subject: [PATCH 148/655] types add GuardrailConfigBlock --- litellm/types/llms/bedrock.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py index 95ebc9742..cf0a4a84b 100644 --- a/litellm/types/llms/bedrock.py +++ b/litellm/types/llms/bedrock.py @@ -1,16 +1,18 @@ -from typing import TypedDict, Any, Union, Optional, Literal, List import json -from .openai import ChatCompletionToolCallChunk +from typing import Any, List, Literal, Optional, TypedDict, Union + from typing_extensions import ( - Self, Protocol, - TypeGuard, - override, - get_origin, - runtime_checkable, Required, + Self, + TypeGuard, + get_origin, + override, + runtime_checkable, ) +from .openai import ChatCompletionToolCallChunk + class SystemContentBlock(TypedDict): text: str @@ -108,6 +110,12 @@ class ToolConfigBlock(TypedDict, total=False): toolChoice: Union[str, ToolChoiceValuesBlock] +class GuardrailConfigBlock(TypedDict, total=False): + guardrailIdentifier: str + guardrailVersion: str + trace: Literal["enabled", "disabled"] + + class InferenceConfig(TypedDict, total=False): maxTokens: int stopSequences: List[str] @@ -144,6 +152,7 @@ class RequestObject(TypedDict, total=False): messages: Required[List[MessageBlock]] system: List[SystemContentBlock] toolConfig: ToolConfigBlock + guardrailConfig: Optional[GuardrailConfigBlock] class GenericStreamingChunk(TypedDict): From 5154ec51a312d894678de6033d9c6f65d0d9b906 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 14:10:09 -0700 Subject: [PATCH 149/655] feat - support guardrailConfig --- litellm/llms/bedrock_httpx.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 644d850a8..a6cf32c77 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -1889,12 +1889,14 @@ class BedrockConverseLLM(BaseLLM): additional_request_params = {} supported_converse_params = AmazonConverseConfig.__annotations__.keys() supported_tool_call_params = ["tools", "tool_choice"] + supported_guardrail_params = ["guardrailConfig"] ## TRANSFORMATION ## # send all model-specific params in 'additional_request_params' for k, v in inference_params.items(): if ( k not in supported_converse_params and k not in supported_tool_call_params + and k not in supported_guardrail_params ): additional_request_params[k] = v additional_request_keys.append(k) @@ -1926,6 +1928,15 @@ class BedrockConverseLLM(BaseLLM): "system": system_content_blocks, "inferenceConfig": InferenceConfig(**inference_params), } + + # Guardrail Config + guardrail_config: Optional[GuardrailConfigBlock] = None + request_guardrails_config = inference_params.pop("guardrailConfig", None) + if request_guardrails_config is not None: + guardrail_config = GuardrailConfigBlock(**request_guardrails_config) + _data["guardrailConfig"] = guardrail_config + + # Tool Config if bedrock_tool_config is not None: _data["toolConfig"] = bedrock_tool_config data = json.dumps(_data) From 46555ab78b3fdfe15edcb271bc51379edfba9584 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 14:13:08 -0700 Subject: [PATCH 150/655] test - bedrock guardrailConfig --- litellm/tests/test_bedrock_completion.py | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py index 220e3f62f..498cfb5d8 100644 --- a/litellm/tests/test_bedrock_completion.py +++ b/litellm/tests/test_bedrock_completion.py @@ -81,6 +81,39 @@ def test_completion_bedrock_claude_completion_auth(): # test_completion_bedrock_claude_completion_auth() +def test_completion_bedrock_guardrails(): + import os + + litellm.set_verbose = True + + try: + response = completion( + model="anthropic.claude-v2", + messages=[ + { + "content": "where do i buy coffee from? ", + "role": "user", + } + ], + max_tokens=10, + guardrailConfig={ + "guardrailIdentifier": "ff6ujrregl1q", + "guardrailVersion": "DRAFT", + "trace": "disabled", + }, + ) + # Add any assertions here to check the response + print(response) + assert ( + "Sorry, the model cannot answer this question. coffee guardrail applied" + in response.choices[0].message.content + ) + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_bedrock_claude_2_1_completion_auth(): print("calling bedrock claude 2.1 completion params auth") import os From 7cac1b0dce394cc07c674e51d3be7bdacbc62717 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 14:18:42 -0700 Subject: [PATCH 151/655] docs - Bedrock Guardrails --- docs/my-website/docs/providers/bedrock.md | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 2b2b8ca81..08a4d4862 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -360,6 +360,37 @@ resp = litellm.completion( print(f"\nResponse: {resp}") ``` + +## Usage - Bedrock Guardrails + +Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html) + +```python +from litellm import completion + +# set env +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" + +response = completion( + model="anthropic.claude-v2", + messages=[ + { + "content": "where do i buy coffee from? ", + "role": "user", + } + ], + max_tokens=10, + guardrailConfig={ + "guardrailIdentifier": "ff6ujrregl1q", # The identifier (ID) for the guardrail. + "guardrailVersion": "DRAFT", # The version of the guardrail. + "trace": "disabled", # The trace behavior for the guardrail. Can either be "disabled" or "enabled" + }, +) +``` + + ## Usage - "Assistant Pre-fill" If you're using Anthropic's Claude with Bedrock, you can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array. From 21963e0bbe788af05533f872fcae3727ca1ae63a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 14:22:02 -0700 Subject: [PATCH 152/655] proxy server --- docs/my-website/docs/providers/bedrock.md | 34 +++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 08a4d4862..1ba679555 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -365,6 +365,9 @@ print(f"\nResponse: {resp}") Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html) + + + ```python from litellm import completion @@ -389,7 +392,38 @@ response = completion( }, ) ``` + + +```python + +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +# request sent to model set on litellm proxy, `litellm --model` +response = client.chat.completions.create(model="anthropic.claude-v2", messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } +], +temperature=0.7, +extra_body={ + guardrailConfig={ + "guardrailIdentifier": "ff6ujrregl1q", # The identifier (ID) for the guardrail. + "guardrailVersion": "DRAFT", # The version of the guardrail. + "trace": "disabled", # The trace behavior for the guardrail. Can either be "disabled" or "enabled" + }, +} +) + +print(response) +``` + + ## Usage - "Assistant Pre-fill" From 1ee8051c7c4ec0964b78c57147cefb10f4d2face Mon Sep 17 00:00:00 2001 From: pat-cohere <145371747+pat-cohere@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:23:04 -0400 Subject: [PATCH 153/655] Update cohere_chat.py Add chat history to cohere chat call that was previously being dropped. --- litellm/llms/cohere_chat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/llms/cohere_chat.py b/litellm/llms/cohere_chat.py index 830c924bd..bfa5ebf78 100644 --- a/litellm/llms/cohere_chat.py +++ b/litellm/llms/cohere_chat.py @@ -235,6 +235,7 @@ def completion( optional_params["message"] = most_recent_message data = { "model": model, + "chat_history": = chat_histrory, **optional_params, } From ebca13b2ca03a0431f7083d0b65c2a0324ad9d75 Mon Sep 17 00:00:00 2001 From: David Leen Date: Mon, 29 Jul 2024 14:23:56 -0700 Subject: [PATCH 154/655] fixes: #4947 Bedrock context exception does not have a response --- litellm/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index 411d2509d..cbe3d8408 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6845,7 +6845,6 @@ def exception_type( message=f"BedrockException: Context Window Error - {error_str}", model=model, llm_provider="bedrock", - response=original_exception.response, ) elif "Malformed input request" in error_str: exception_mapping_worked = True From 972df1f303f4f18a80435c91cadd3cbe674f6cef Mon Sep 17 00:00:00 2001 From: pat-cohere <145371747+pat-cohere@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:26:07 -0400 Subject: [PATCH 155/655] Update cohere_chat.py --- litellm/llms/cohere_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/cohere_chat.py b/litellm/llms/cohere_chat.py index bfa5ebf78..1b4e0d798 100644 --- a/litellm/llms/cohere_chat.py +++ b/litellm/llms/cohere_chat.py @@ -235,7 +235,7 @@ def completion( optional_params["message"] = most_recent_message data = { "model": model, - "chat_history": = chat_histrory, + "chat_history": chat_histrory, **optional_params, } From 3ce0cac56d0d004f2ee30ef5b484df2da9881fe5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 14:26:46 -0700 Subject: [PATCH 156/655] docs guardrailConfig --- docs/my-website/docs/providers/bedrock.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 1ba679555..485dbf892 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -412,7 +412,7 @@ response = client.chat.completions.create(model="anthropic.claude-v2", messages ], temperature=0.7, extra_body={ - guardrailConfig={ + "guardrailConfig": { "guardrailIdentifier": "ff6ujrregl1q", # The identifier (ID) for the guardrail. "guardrailVersion": "DRAFT", # The version of the guardrail. "trace": "disabled", # The trace behavior for the guardrail. Can either be "disabled" or "enabled" From 9cb3235d98cbbe9e56cb76ccb60196d778d04954 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 21:41:45 +0000 Subject: [PATCH 157/655] build(deps): bump fast-xml-parser in /docs/my-website Bumps [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) from 4.3.2 to 4.4.1. - [Release notes](https://github.com/NaturalIntelligence/fast-xml-parser/releases) - [Changelog](https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/CHANGELOG.md) - [Commits](https://github.com/NaturalIntelligence/fast-xml-parser/compare/v4.3.2...v4.4.1) --- updated-dependencies: - dependency-name: fast-xml-parser dependency-type: indirect ... Signed-off-by: dependabot[bot] --- docs/my-website/package-lock.json | 603 +++++++++++++----------------- docs/my-website/yarn.lock | 16 +- 2 files changed, 273 insertions(+), 346 deletions(-) diff --git a/docs/my-website/package-lock.json b/docs/my-website/package-lock.json index cc3923787..54939a98a 100644 --- a/docs/my-website/package-lock.json +++ b/docs/my-website/package-lock.json @@ -12,10 +12,11 @@ "@docusaurus/plugin-google-gtag": "^2.4.1", "@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/preset-classic": "2.4.1", + "@getcanary/docusaurus-pagefind": "^0.0.12", + "@getcanary/web": "^0.0.55", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "docusaurus": "^1.14.7", - "docusaurus-lunr-search": "^2.4.1", "prism-react-renderer": "^1.3.5", "react": "^18.1.0", "react-dom": "^18.1.0", @@ -3689,6 +3690,66 @@ "react-waypoint": ">=9.0.2" } }, + "node_modules/@floating-ui/core": { + "version": "1.6.5", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.5.tgz", + "integrity": "sha512-8GrTWmoFhm5BsMZOTHeGD2/0FLKLQQHvO/ZmQga4tKempYRLz8aqJGqXVuQgisnMObq2YZ2SgkwctN1LOOxcqA==", + "dependencies": { + "@floating-ui/utils": "^0.2.5" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.6.8", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.8.tgz", + "integrity": "sha512-kx62rP19VZ767Q653wsP1XZCGIirkE09E0QUGNYTM/ttbbQHqcGPdSfWFxUyyNLc/W6aoJRBajOSXhP6GXjC0Q==", + "dependencies": { + "@floating-ui/core": "^1.6.0", + "@floating-ui/utils": "^0.2.5" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.5.tgz", + "integrity": "sha512-sTcG+QZ6fdEUObICavU+aB3Mp8HY4n14wYHdxK4fXjPmv3PXZZeY5RaguJmGyeH/CJQhX3fqKUtS4qc1LoHwhQ==" + }, + "node_modules/@getcanary/docusaurus-pagefind": { + "version": "0.0.12", + "resolved": "https://registry.npmjs.org/@getcanary/docusaurus-pagefind/-/docusaurus-pagefind-0.0.12.tgz", + "integrity": "sha512-F0OQ0Lb/GltewDEr0w+BgPbNyYpzAQZ/TtuG5rbtC3PnrOL+9pDMe/Gs0kE8AuY1uEd/YQOKr61rbY/k7kkFig==", + "dependencies": { + "cli-progress": "^3.12.0", + "micromatch": "^4.0.7", + "pagefind": "^1.1.0" + }, + "peerDependencies": { + "@docusaurus/core": "^2.0.0 || ^3.0.0", + "@getcanary/web": "*", + "react": "^17 || ^18", + "react-dom": "^17 || ^18" + } + }, + "node_modules/@getcanary/web": { + "version": "0.0.55", + "resolved": "https://registry.npmjs.org/@getcanary/web/-/web-0.0.55.tgz", + "integrity": "sha512-DjIhTMeuLZaHT+/h+O6Keg9Gb58frPURpM4lkKrN/wmRMoCnOuly3oXIH2X37YhAoHXi4udDRJ60mtD0UZy0uw==", + "dependencies": { + "@floating-ui/dom": "^1.6.8", + "@lit-labs/observers": "^2.0.2", + "@lit/context": "^1.1.2", + "@lit/task": "^1.0.1", + "highlight.js": "^11.10.0", + "lit": "^3.1.4", + "marked": "^13.0.2" + } + }, + "node_modules/@getcanary/web/node_modules/highlight.js": { + "version": "11.10.0", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.10.0.tgz", + "integrity": "sha512-SYVnVFswQER+zu1laSya563s+F8VDGt7o35d4utbamowvUNLLMovFqwCLSocpZTz3MgaSRA1IbqRWZv97dtErQ==", + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/@hapi/hoek": { "version": "9.3.0", "resolved": "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz", @@ -3786,6 +3847,43 @@ "resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.4.tgz", "integrity": "sha512-Hcv+nVC0kZnQ3tD9GVu5xSMR4VVYOteQIr/hwFPVEvPdlXqgGEuRjiheChHgdM+JyqdgNcmzZOX/tnl0JOiI7A==" }, + "node_modules/@lit-labs/observers": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@lit-labs/observers/-/observers-2.0.2.tgz", + "integrity": "sha512-eZb5+W9Cb0e/Y5m1DNxBSGTvGB2TAVTGMnTxL/IzFhPQEcZIAHewW1eVBhN8W07A5tirRaAmmF6fGL1V20p3gQ==", + "dependencies": { + "@lit/reactive-element": "^1.0.0 || ^2.0.0" + } + }, + "node_modules/@lit-labs/ssr-dom-shim": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@lit-labs/ssr-dom-shim/-/ssr-dom-shim-1.2.0.tgz", + "integrity": "sha512-yWJKmpGE6lUURKAaIltoPIE/wrbY3TEkqQt+X0m+7fQNnAv0keydnYvbiJFP1PnMhizmIWRWOG5KLhYyc/xl+g==" + }, + "node_modules/@lit/context": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@lit/context/-/context-1.1.2.tgz", + "integrity": "sha512-S0nw2C6Tkm7fVX5TGYqeROGD+Z9Coa2iFpW+ysYBDH3YvCqOY3wVQvSgwbaliLJkjTnSEYCBe9qFqKV8WUFpVw==", + "dependencies": { + "@lit/reactive-element": "^1.6.2 || ^2.0.0" + } + }, + "node_modules/@lit/reactive-element": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@lit/reactive-element/-/reactive-element-2.0.4.tgz", + "integrity": "sha512-GFn91inaUa2oHLak8awSIigYz0cU0Payr1rcFsrkf5OJ5eSPxElyZfKh0f2p9FsTiZWXQdWGJeXZICEfXXYSXQ==", + "dependencies": { + "@lit-labs/ssr-dom-shim": "^1.2.0" + } + }, + "node_modules/@lit/task": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@lit/task/-/task-1.0.1.tgz", + "integrity": "sha512-fVLDtmwCau8NywnFIXaJxsCZjzaIxnVq+cFRKYC1Y4tA4/0rMTvF6DLZZ2JE51BwzOluaKtgJX8x1QDsQtAaIw==", + "dependencies": { + "@lit/reactive-element": "^1.0.0 || ^2.0.0" + } + }, "node_modules/@mdx-js/mdx": { "version": "1.6.22", "resolved": "https://registry.npmjs.org/@mdx-js/mdx/-/mdx-1.6.22.tgz", @@ -3965,6 +4063,66 @@ "node": ">= 8" } }, + "node_modules/@pagefind/darwin-arm64": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@pagefind/darwin-arm64/-/darwin-arm64-1.1.0.tgz", + "integrity": "sha512-SLsXNLtSilGZjvqis8sX42fBWsWAVkcDh1oerxwqbac84HbiwxpxOC2jm8hRwcR0Z55HPZPWO77XeRix/8GwTg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@pagefind/darwin-x64": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@pagefind/darwin-x64/-/darwin-x64-1.1.0.tgz", + "integrity": "sha512-QjQSE/L5oS1C8N8GdljGaWtjCBMgMtfrPAoiCmINTu9Y9dp0ggAyXvF8K7Qg3VyIMYJ6v8vg2PN7Z3b+AaAqUA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@pagefind/linux-arm64": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@pagefind/linux-arm64/-/linux-arm64-1.1.0.tgz", + "integrity": "sha512-8zjYCa2BtNEL7KnXtysPtBELCyv5DSQ4yHeK/nsEq6w4ToAMTBl0K06khqxdSGgjMSwwrxvLzq3so0LC5Q14dA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@pagefind/linux-x64": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@pagefind/linux-x64/-/linux-x64-1.1.0.tgz", + "integrity": "sha512-4lsg6VB7A6PWTwaP8oSmXV4O9H0IHX7AlwTDcfyT+YJo/sPXOVjqycD5cdBgqNLfUk8B9bkWcTDCRmJbHrKeCw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@pagefind/windows-x64": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@pagefind/windows-x64/-/windows-x64-1.1.0.tgz", + "integrity": "sha512-OboCM76BcMKT9IoSfZuFhiqMRgTde8x4qDDvKulFmycgiJrlL5WnIqBHJLQxZq+o2KyZpoHF97iwsGAm8c32sQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@polka/url": { "version": "1.0.0-next.21", "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz", @@ -4560,6 +4718,11 @@ "@types/node": "*" } }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==" + }, "node_modules/@types/unist": { "version": "2.0.7", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz", @@ -4727,11 +4890,6 @@ "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz", "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==" }, - "node_modules/abbrev": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", - "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==" - }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -4997,11 +5155,6 @@ "node": ">= 8" } }, - "node_modules/aproba": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", - "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==" - }, "node_modules/arch": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/arch/-/arch-2.2.0.tgz", @@ -5279,14 +5432,6 @@ "node": ">= 4.5.0" } }, - "node_modules/autocomplete.js": { - "version": "0.37.1", - "resolved": "https://registry.npmjs.org/autocomplete.js/-/autocomplete.js-0.37.1.tgz", - "integrity": "sha512-PgSe9fHYhZEsm/9jggbjtVsGXJkPLvd+9mC7gZJ662vVL5CRWEtm/mIrrzCx0MrNxHVwxD5d00UOn6NsmL2LUQ==", - "dependencies": { - "immediate": "^3.2.3" - } - }, "node_modules/autolinker": { "version": "3.16.2", "resolved": "https://registry.npmjs.org/autolinker/-/autolinker-3.16.2.tgz", @@ -5572,15 +5717,6 @@ "resolved": "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz", "integrity": "sha512-x+VAiMRL6UPkx+kudNvxTl6hB2XNNCG2r+7wixVfIYwu/2HKRXimwQyaumLjMveWvT2Hkd/cAJw+QBMfJ/EKVw==" }, - "node_modules/bcp-47-match": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/bcp-47-match/-/bcp-47-match-1.0.3.tgz", - "integrity": "sha512-LggQ4YTdjWQSKELZF5JwchnBa1u0pIQSZf5lSdOHEdbVP55h0qICA/FUp3+W99q0xqxYa1ZQizTUH87gecII5w==", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, "node_modules/bcrypt-pbkdf": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", @@ -6370,11 +6506,11 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -6932,6 +7068,35 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/cli-progress": { + "version": "3.12.0", + "resolved": "https://registry.npmjs.org/cli-progress/-/cli-progress-3.12.0.tgz", + "integrity": "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A==", + "dependencies": { + "string-width": "^4.2.3" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/cli-progress/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/cli-progress/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/cli-table3": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/cli-table3/-/cli-table3-0.6.3.tgz", @@ -7144,14 +7309,6 @@ "simple-swizzle": "^0.2.2" } }, - "node_modules/color-support": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", - "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==", - "bin": { - "color-support": "bin.js" - } - }, "node_modules/colord": { "version": "2.9.3", "resolved": "https://registry.npmjs.org/colord/-/colord-2.9.3.tgz", @@ -7362,11 +7519,6 @@ "resolved": "https://registry.npmjs.org/consola/-/consola-2.15.3.tgz", "integrity": "sha512-9vAdYbHj6x2fLKC4+oPH0kFzY/orMZyG2Aj+kNylHxKGJ/Ed4dpNyAQYwJOdqO4zdM7XpVHmyejQDcQHrnuXbw==" }, - "node_modules/console-control-strings": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", - "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==" - }, "node_modules/console-stream": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/console-stream/-/console-stream-0.1.1.tgz", @@ -7699,11 +7851,6 @@ "resolved": "https://registry.npmjs.org/css-select-base-adapter/-/css-select-base-adapter-0.1.1.tgz", "integrity": "sha512-jQVeeRG70QI08vSTwf1jHxp74JoZsr2XSgETae8/xC8ovSnL2WF87GTLO86Sbwdt2lK4Umg4HnnwMO4YF3Ce7w==" }, - "node_modules/css-selector-parser": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/css-selector-parser/-/css-selector-parser-1.4.1.tgz", - "integrity": "sha512-HYPSb7y/Z7BNDCOrakL4raGO2zltZkbeXyAd6Tg9obzix6QhzxCotdBl6VT0Dv4vZfJGVz3WL/xaEI9Ly3ul0g==" - }, "node_modules/css-tree": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-1.1.3.tgz", @@ -8418,18 +8565,6 @@ "node": ">=8" } }, - "node_modules/direction": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/direction/-/direction-1.0.4.tgz", - "integrity": "sha512-GYqKi1aH7PJXxdhTeZBFrg8vUBeKXi+cNprXsC1kpJcbcVnV9wBsrOu1cQEdG0WeQwlfHiy3XvnKfIrJ2R0NzQ==", - "bin": { - "direction": "cli.js" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, "node_modules/discontinuous-range": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/discontinuous-range/-/discontinuous-range-1.0.0.tgz", @@ -8515,35 +8650,6 @@ "docusaurus-write-translations": "lib/write-translations.js" } }, - "node_modules/docusaurus-lunr-search": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/docusaurus-lunr-search/-/docusaurus-lunr-search-2.4.1.tgz", - "integrity": "sha512-UOgaAypgO0iLyA1Hk4EThG/ofLm9/JldznzN98ZKr7TMYVjMZbAEaIBKLAUDFdfOPr9D5EswXdLn39/aRkwHMA==", - "dependencies": { - "autocomplete.js": "^0.37.0", - "clsx": "^1.2.1", - "gauge": "^3.0.0", - "hast-util-select": "^4.0.0", - "hast-util-to-text": "^2.0.0", - "hogan.js": "^3.0.2", - "lunr": "^2.3.8", - "lunr-languages": "^1.4.0", - "minimatch": "^3.0.4", - "object-assign": "^4.1.1", - "rehype-parse": "^7.0.1", - "to-vfile": "^6.1.0", - "unified": "^9.0.0", - "unist-util-is": "^4.0.2" - }, - "engines": { - "node": ">= 8.10.0" - }, - "peerDependencies": { - "@docusaurus/core": "^2.0.0-alpha.60 || ^2.0.0", - "react": "^16.8.4 || ^17", - "react-dom": "^16.8.4 || ^17" - } - }, "node_modules/docusaurus/node_modules/@babel/code-frame": { "version": "7.10.4", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz", @@ -11219,9 +11325,9 @@ } }, "node_modules/fast-xml-parser": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.3.2.tgz", - "integrity": "sha512-rmrXUXwbJedoXkStenj1kkljNF7ugn5ZjR9FJcwmCfcCbtOMDghPajbc+Tck6vE6F5XsDmx+Pr2le9fw8+pXBg==", + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz", + "integrity": "sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==", "funding": [ { "type": "github", @@ -11398,9 +11504,9 @@ } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dependencies": { "to-regex-range": "^5.0.1" }, @@ -11815,43 +11921,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/gauge": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", - "integrity": "sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==", - "dependencies": { - "aproba": "^1.0.3 || ^2.0.0", - "color-support": "^1.1.2", - "console-control-strings": "^1.0.0", - "has-unicode": "^2.0.1", - "object-assign": "^4.1.1", - "signal-exit": "^3.0.0", - "string-width": "^4.2.3", - "strip-ansi": "^6.0.1", - "wide-align": "^1.1.2" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/gauge/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "node_modules/gauge/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/gaze": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/gaze/-/gaze-1.1.3.tgz", @@ -12486,11 +12555,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/has-unicode": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", - "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==" - }, "node_modules/has-value": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/has-value/-/has-value-1.0.0.tgz", @@ -12608,24 +12672,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/hast-util-has-property": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/hast-util-has-property/-/hast-util-has-property-1.0.4.tgz", - "integrity": "sha512-ghHup2voGfgFoHMGnaLHOjbYFACKrRh9KFttdCzMCbFoBMJXiNi2+XTrPP8+q6cDJM/RSqlCfVWrjp1H201rZg==", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-is-element": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-1.1.0.tgz", - "integrity": "sha512-oUmNua0bFbdrD/ELDSSEadRVtWZOf3iF6Lbv81naqsIV99RnSCieTbWuWCY8BAeEfKJTKl0gRdokv+dELutHGQ==", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/hast-util-parse-selector": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz", @@ -12661,31 +12707,6 @@ "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" }, - "node_modules/hast-util-select": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/hast-util-select/-/hast-util-select-4.0.2.tgz", - "integrity": "sha512-8EEG2//bN5rrzboPWD2HdS3ugLijNioS1pqOTIolXNf67xxShYw4SQEmVXd3imiBG+U2bC2nVTySr/iRAA7Cjg==", - "dependencies": { - "bcp-47-match": "^1.0.0", - "comma-separated-tokens": "^1.0.0", - "css-selector-parser": "^1.0.0", - "direction": "^1.0.0", - "hast-util-has-property": "^1.0.0", - "hast-util-is-element": "^1.0.0", - "hast-util-to-string": "^1.0.0", - "hast-util-whitespace": "^1.0.0", - "not": "^0.1.0", - "nth-check": "^2.0.0", - "property-information": "^5.0.0", - "space-separated-tokens": "^1.0.0", - "unist-util-visit": "^2.0.0", - "zwitch": "^1.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/hast-util-to-parse5": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/hast-util-to-parse5/-/hast-util-to-parse5-6.0.0.tgz", @@ -12702,38 +12723,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/hast-util-to-string": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/hast-util-to-string/-/hast-util-to-string-1.0.4.tgz", - "integrity": "sha512-eK0MxRX47AV2eZ+Lyr18DCpQgodvaS3fAQO2+b9Two9F5HEoRPhiUMNzoXArMJfZi2yieFzUBMRl3HNJ3Jus3w==", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-text": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-2.0.1.tgz", - "integrity": "sha512-8nsgCARfs6VkwH2jJU9b8LNTuR4700na+0h3PqCaEk4MAnMDeu5P0tP8mjk9LLNGxIeQRLbiDbZVw6rku+pYsQ==", - "dependencies": { - "hast-util-is-element": "^1.0.0", - "repeat-string": "^1.0.0", - "unist-util-find-after": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-whitespace": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-1.0.4.tgz", - "integrity": "sha512-I5GTdSfhYfAPNztx2xJRQpG8cuDSNt599/7YUn7Gx/WxNMsG+a835k97TDkFgk123cwjfwINaZknkKkphx/f2A==", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/hastscript": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-6.0.0.tgz", @@ -12786,18 +12775,6 @@ "value-equal": "^1.0.1" } }, - "node_modules/hogan.js": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/hogan.js/-/hogan.js-3.0.2.tgz", - "integrity": "sha512-RqGs4wavGYJWE07t35JQccByczmNUXQT0E12ZYV1VKYu5UiAU9lsos/yBAcf840+zrUQQxgVduCR5/B8nNtibg==", - "dependencies": { - "mkdirp": "0.3.0", - "nopt": "1.0.10" - }, - "bin": { - "hulk": "bin/hulk" - } - }, "node_modules/hoist-non-react-statics": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", @@ -13673,11 +13650,6 @@ "node": ">=0.10.0" } }, - "node_modules/immediate": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.3.0.tgz", - "integrity": "sha512-HR7EVodfFUdQCTIeySw+WDRFJlPcLOJbXfwwZ7Oom6tjsvZ3bOkCDJHehQC3nxJrv7+f9XecwazynjU8e4Vw3Q==" - }, "node_modules/immer": { "version": "9.0.21", "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.21.tgz", @@ -14798,6 +14770,34 @@ "resolved": "https://registry.npmjs.org/listenercount/-/listenercount-1.0.1.tgz", "integrity": "sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ==" }, + "node_modules/lit": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/lit/-/lit-3.1.4.tgz", + "integrity": "sha512-q6qKnKXHy2g1kjBaNfcoLlgbI3+aSOZ9Q4tiGa9bGYXq5RBXxkVTqTIVmP2VWMp29L4GyvCFm8ZQ2o56eUAMyA==", + "dependencies": { + "@lit/reactive-element": "^2.0.4", + "lit-element": "^4.0.4", + "lit-html": "^3.1.2" + } + }, + "node_modules/lit-element": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lit-element/-/lit-element-4.0.6.tgz", + "integrity": "sha512-U4sdJ3CSQip7sLGZ/uJskO5hGiqtlpxndsLr6mt3IQIjheg93UKYeGQjWMRql1s/cXNOaRrCzC2FQwjIwSUqkg==", + "dependencies": { + "@lit-labs/ssr-dom-shim": "^1.2.0", + "@lit/reactive-element": "^2.0.4", + "lit-html": "^3.1.2" + } + }, + "node_modules/lit-html": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/lit-html/-/lit-html-3.1.4.tgz", + "integrity": "sha512-yKKO2uVv7zYFHlWMfZmqc+4hkmSbFp8jgjdZY9vvR9jr4J8fH6FUMXhr+ljfELgmjpvlF7Z1SJ5n5/Jeqtc9YA==", + "dependencies": { + "@types/trusted-types": "^2.0.2" + } + }, "node_modules/livereload-js": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/livereload-js/-/livereload-js-2.4.0.tgz", @@ -15106,16 +15106,6 @@ "yallist": "^3.0.2" } }, - "node_modules/lunr": { - "version": "2.3.9", - "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", - "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==" - }, - "node_modules/lunr-languages": { - "version": "1.13.0", - "resolved": "https://registry.npmjs.org/lunr-languages/-/lunr-languages-1.13.0.tgz", - "integrity": "sha512-qgTOarcnAtVFKr0aJ2GuiqbBdhKF61jpF8OgFbnlSAb1t6kOiQW67q0hv0UQzzB+5+OwPpnZyFT/L0L9SQG1/A==" - }, "node_modules/make-dir": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", @@ -15265,6 +15255,17 @@ "node": ">= 0.10.0" } }, + "node_modules/marked": { + "version": "13.0.3", + "resolved": "https://registry.npmjs.org/marked/-/marked-13.0.3.tgz", + "integrity": "sha512-rqRix3/TWzE9rIoFGIn8JmsVfhiuC8VIQ8IdX5TfzmeBucdY05/0UlzKaw0eVtpcN/OdVFpBk7CjKGo9iHJ/zA==", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/math-random": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/math-random/-/math-random-1.0.4.tgz", @@ -15403,11 +15404,11 @@ "integrity": "sha512-jo1OfR4TaEwd5HOrt5+tAZ9mqT4jmpNAusXtyfNzqVm9uiSYFZlKM1wYL4oU7azZW/PxQW53wM0S6OR1JHNa2g==" }, "node_modules/micromatch": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", - "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", + "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", "dependencies": { - "braces": "^3.0.2", + "braces": "^3.0.3", "picomatch": "^2.3.1" }, "engines": { @@ -15525,15 +15526,6 @@ "node": ">=0.10.0" } }, - "node_modules/mkdirp": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.3.0.tgz", - "integrity": "sha512-OHsdUcVAQ6pOtg5JYWpCBo9W/GySVuwvP9hueRMW7UqshC0tbfzLv8wjySTPm3tfUZ/21CE9E1pJagOA91Pxew==", - "deprecated": "Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)", - "engines": { - "node": "*" - } - }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", @@ -15744,20 +15736,6 @@ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==" }, - "node_modules/nopt": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/nopt/-/nopt-1.0.10.tgz", - "integrity": "sha512-NWmpvLSqUrgrAC9HCuxEvb+PSloHpqVu+FqcO4eeF2h5qYRhA7ev6KvelyQAKtegUbC6RypJnlEOhd8vloNKYg==", - "dependencies": { - "abbrev": "1" - }, - "bin": { - "nopt": "bin/nopt.js" - }, - "engines": { - "node": "*" - } - }, "node_modules/normalize-package-data": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", @@ -15804,11 +15782,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/not": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/not/-/not-0.1.0.tgz", - "integrity": "sha512-5PDmaAsVfnWUgTUbJ3ERwn7u79Z0dYxN9ErxCpVJJqe2RK0PJ3z+iFUxuqjwtlDDegXvtWoxD/3Fzxox7tFGWA==" - }, "node_modules/npm-conf": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/npm-conf/-/npm-conf-1.1.3.tgz", @@ -16314,6 +16287,21 @@ "semver": "bin/semver.js" } }, + "node_modules/pagefind": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/pagefind/-/pagefind-1.1.0.tgz", + "integrity": "sha512-1nmj0/vfYcMxNEQj0YDRp6bTVv9hI7HLdPhK/vBBYlrnwjATndQvHyicj5Y7pUHrpCFZpFnLVQXIF829tpFmaw==", + "bin": { + "pagefind": "lib/runner/bin.cjs" + }, + "optionalDependencies": { + "@pagefind/darwin-arm64": "1.1.0", + "@pagefind/darwin-x64": "1.1.0", + "@pagefind/linux-arm64": "1.1.0", + "@pagefind/linux-x64": "1.1.0", + "@pagefind/windows-x64": "1.1.0" + } + }, "node_modules/param-case": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/param-case/-/param-case-3.0.4.tgz", @@ -18162,24 +18150,6 @@ "jsesc": "bin/jsesc" } }, - "node_modules/rehype-parse": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/rehype-parse/-/rehype-parse-7.0.1.tgz", - "integrity": "sha512-fOiR9a9xH+Le19i4fGzIEowAbwG7idy2Jzs4mOrFWBSJ0sNUgy0ev871dwWnbOo371SjgjG4pwzrbgSVrKxecw==", - "dependencies": { - "hast-util-from-parse5": "^6.0.0", - "parse5": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/rehype-parse/node_modules/parse5": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" - }, "node_modules/relateurl": { "version": "0.2.7", "resolved": "https://registry.npmjs.org/relateurl/-/relateurl-0.2.7.tgz", @@ -20775,19 +20745,6 @@ "node": ">=0.10.0" } }, - "node_modules/to-vfile": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/to-vfile/-/to-vfile-6.1.0.tgz", - "integrity": "sha512-BxX8EkCxOAZe+D/ToHdDsJcVI4HqQfmw0tCkp31zf3dNP/XWIAjU4CmeuSwsSoOzOTqHPOL0KUzyZqJplkD0Qw==", - "dependencies": { - "is-buffer": "^2.0.0", - "vfile": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -21323,18 +21280,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/unist-util-find-after": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-3.0.0.tgz", - "integrity": "sha512-ojlBqfsBftYXExNu3+hHLfJQ/X1jYY/9vdm4yZWjIbf0VuWF6CRufci1ZyoD/wV2TYMKxXUoNuoqwy+CkgzAiQ==", - "dependencies": { - "unist-util-is": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/unist-util-generated": { "version": "1.1.6", "resolved": "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-1.1.6.tgz", @@ -22388,32 +22333,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/wide-align": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.5.tgz", - "integrity": "sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==", - "dependencies": { - "string-width": "^1.0.2 || 2 || 3 || 4" - } - }, - "node_modules/wide-align/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "node_modules/wide-align/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/widest-line": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/widest-line/-/widest-line-4.0.1.tgz", diff --git a/docs/my-website/yarn.lock b/docs/my-website/yarn.lock index 650e7808e..92809c0f7 100644 --- a/docs/my-website/yarn.lock +++ b/docs/my-website/yarn.lock @@ -1722,7 +1722,7 @@ "@docusaurus/theme-search-algolia" "2.4.1" "@docusaurus/types" "2.4.1" -"@docusaurus/react-loadable@5.5.2", "react-loadable@npm:@docusaurus/react-loadable@5.5.2": +"@docusaurus/react-loadable@5.5.2": version "5.5.2" resolved "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz" integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== @@ -5752,9 +5752,9 @@ fast-url-parser@1.1.3: punycode "^1.3.2" fast-xml-parser@^4.1.3: - version "4.3.2" - resolved "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.3.2.tgz" - integrity sha512-rmrXUXwbJedoXkStenj1kkljNF7ugn5ZjR9FJcwmCfcCbtOMDghPajbc+Tck6vE6F5XsDmx+Pr2le9fw8+pXBg== + version "4.4.1" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz#86dbf3f18edf8739326447bcaac31b4ae7f6514f" + integrity sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw== dependencies: strnum "^1.0.5" @@ -10329,6 +10329,14 @@ react-loadable-ssr-addon-v5-slorber@^1.0.1: dependencies: "@babel/runtime" "^7.10.3" +"react-loadable@npm:@docusaurus/react-loadable@5.5.2": + version "5.5.2" + resolved "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz" + integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== + dependencies: + "@types/react" "*" + prop-types "^15.6.2" + react-router-config@^5.1.1: version "5.1.1" resolved "https://registry.npmjs.org/react-router-config/-/react-router-config-5.1.1.tgz" From 8e24c3c945502b42216e366762ef522a92d637b7 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 15:42:57 -0700 Subject: [PATCH 158/655] build(model_prices_and_context_window.json): update model info for llama3.1 on bedrock - supports tool calling, not tool choice --- litellm/model_prices_and_context_window_backup.json | 12 +++++++++--- model_prices_and_context_window.json | 12 +++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d20e5681c..8a07eca34 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3773,7 +3773,9 @@ "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000006, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama3-1-70b-instruct-v1:0": { "max_tokens": 128000, @@ -3782,7 +3784,9 @@ "input_cost_per_token": 0.00000265, "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama3-1-405b-instruct-v1:0": { "max_tokens": 128000, @@ -3791,7 +3795,9 @@ "input_cost_per_token": 0.00000532, "output_cost_per_token": 0.000016, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d20e5681c..8a07eca34 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3773,7 +3773,9 @@ "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000006, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama3-1-70b-instruct-v1:0": { "max_tokens": 128000, @@ -3782,7 +3784,9 @@ "input_cost_per_token": 0.00000265, "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama3-1-405b-instruct-v1:0": { "max_tokens": 128000, @@ -3791,7 +3795,9 @@ "input_cost_per_token": 0.00000532, "output_cost_per_token": 0.000016, "litellm_provider": "bedrock", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, From 7f0daafc564003a65592c7254e7127aebcc7bdf9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 15:50:12 -0700 Subject: [PATCH 159/655] docs(main.py): update acompletion_with_retries docstring Closes https://github.com/BerriAI/litellm/issues/4908 --- litellm/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/main.py b/litellm/main.py index 20fa0345e..3a52ae29b 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2791,6 +2791,7 @@ def completion_with_retries(*args, **kwargs): async def acompletion_with_retries(*args, **kwargs): """ + [DEPRECATED]. Use 'acompletion' or router.acompletion instead! Executes a litellm.completion() with 3 retries """ try: From 3d88d7f750e74ff230a0e538ac1725250c0989e4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 15:51:13 -0700 Subject: [PATCH 160/655] ui - allow entering custom model names --- .../src/components/model_dashboard.tsx | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx index 27608f0ae..bf93e3416 100644 --- a/ui/litellm-dashboard/src/components/model_dashboard.tsx +++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx @@ -220,6 +220,9 @@ const handleSubmit = async ( else if (key === "base_model") { // Add key-value pair to model_info dictionary modelInfoObj[key] = value; + } + else if (key === "custom_model_name") { + litellmParamsObj["model"] = value; } else if (key == "litellm_extra_params") { console.log("litellm_extra_params:", value); let litellmExtraParams = {}; @@ -1718,26 +1721,43 @@ const ModelDashboard: React.FC = ({ + - { (selectedProvider === Providers.Azure) || (selectedProvider === Providers.OpenAI_Compatible) || (selectedProvider === Providers.Ollama) ? ( - - ) : providerModels.length > 0 ? ( - - {providerModels.map((model, index) => ( - - {model} - - ))} - - ) : ( - - )} + + Custom Model Name (Enter below) + {providerModels.map((model, index) => ( + + {model} + + ))} + + + prevValues.model !== currentValues.model} + > + {({ getFieldValue }) => { + const selectedModels = getFieldValue('model') || []; + return selectedModels.includes('custom') && ( + + + + ) + }} + + From d71dc1acaf57108d4b2a5a45b8713b5d6fcbb1c0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 16:58:56 -0700 Subject: [PATCH 161/655] ui use setGlobalLitellmHeaderName --- ui/litellm-dashboard/src/app/page.tsx | 6 + .../src/components/networking.tsx | 180 ++++++++---------- 2 files changed, 89 insertions(+), 97 deletions(-) diff --git a/ui/litellm-dashboard/src/app/page.tsx b/ui/litellm-dashboard/src/app/page.tsx index 26bdb7af2..9b7a09cbf 100644 --- a/ui/litellm-dashboard/src/app/page.tsx +++ b/ui/litellm-dashboard/src/app/page.tsx @@ -18,6 +18,7 @@ import Usage from "../components/usage"; import CacheDashboard from "@/components/cache_dashboard"; import { jwtDecode } from "jwt-decode"; import { Typography } from "antd"; +import { setGlobalLitellmHeaderName } from "../components/networking" function getCookie(name: string) { console.log("COOKIES", document.cookie) @@ -123,6 +124,11 @@ const CreateKeyPage = () => { if (decoded.premium_user) { setPremiumUser(decoded.premium_user); } + + if (decoded.auth_header_name) { + setGlobalLitellmHeaderName(decoded.auth_header_name); + } + } } }, [token]); diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index 8527b39a2..b196d3c2e 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -37,30 +37,15 @@ const handleError = async (errorData: string) => { }; -export const getLiteLLMHeaderName = async () => { - try { - const url = proxyBaseUrl ? `${proxyBaseUrl}/litellm_header_name` : '/litellm_header_name'; - const response = await fetch(url, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }); +// Global variable for the header name +let globalLitellmHeaderName: string | null = null; - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } +// Function to set the global header name +export function setGlobalLitellmHeaderName(headerName: string | null) { + console.log(`setGlobalLitellmHeaderName: ${headerName}`); + globalLitellmHeaderName = headerName; +} - const data = await response.json(); - console.log('Received LiteLLM header name:', data); - return data.litellm_key_header_name; - } catch (error) { - console.error('Failed to get LiteLLM header name:', error); - throw error; - } -}; - -const litellm_key_header_name = getLiteLLMHeaderName(); export const modelCostMap = async ( accessToken: string, @@ -71,7 +56,7 @@ export const modelCostMap = async ( url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, } @@ -94,7 +79,7 @@ export const modelCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -133,7 +118,7 @@ export const modelSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -164,7 +149,7 @@ export const modelDeleteCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -206,7 +191,7 @@ export const budgetDeleteCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -241,7 +226,7 @@ export const budgetCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -277,7 +262,7 @@ export const invitationCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -316,7 +301,7 @@ export const invitationClaimCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -354,7 +339,7 @@ export const alertingSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -411,7 +396,7 @@ export const keyCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -473,7 +458,7 @@ export const userCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -507,7 +492,7 @@ export const keyDeleteCall = async (accessToken: String, user_key: String) => { const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -539,7 +524,7 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => { const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -592,7 +577,7 @@ export const userInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -627,7 +612,7 @@ export const teamInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -659,7 +644,7 @@ export const getTotalSpendCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -724,7 +709,7 @@ export const claimOnboardingToken = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -766,7 +751,7 @@ export const modelInfoCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -814,7 +799,7 @@ export const modelHubCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -845,7 +830,7 @@ export const getAllowedIPs = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -874,7 +859,7 @@ export const addAllowedIP = async (accessToken: String, ip: String) => { const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ ip: ip }), @@ -904,7 +889,7 @@ export const deleteAllowedIP = async (accessToken: String, ip: String) => { const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ ip: ip }), @@ -946,7 +931,7 @@ export const modelMetricsCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -985,7 +970,7 @@ export const streamingModelMetricsCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1030,7 +1015,7 @@ export const modelMetricsSlowResponsesCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1074,7 +1059,7 @@ export const modelExceptionsCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1102,6 +1087,7 @@ export const modelAvailableCall = async ( /** * Get all the models user has access to */ + console.log("in /models calls, globalLitellmHeaderName", globalLitellmHeaderName) try { let url = proxyBaseUrl ? `${proxyBaseUrl}/models` : `/models`; @@ -1109,7 +1095,7 @@ export const modelAvailableCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1139,7 +1125,7 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => { const response = await fetch(`${url}?api_key=${token}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1167,7 +1153,7 @@ export const teamSpendLogsCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1210,7 +1196,7 @@ export const tagsSpendLogsCall = async ( const response = await fetch(`${url}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1238,7 +1224,7 @@ export const allTagNamesCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1266,7 +1252,7 @@ export const allEndUsersCall = async (accessToken: String) => { const response = await fetch(`${url}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1304,7 +1290,7 @@ export const userSpendLogsCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1334,7 +1320,7 @@ export const adminSpendLogsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1364,7 +1350,7 @@ export const adminTopKeysCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1412,14 +1398,14 @@ export const adminTopEndUsersCall = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; "Content-Type": string; }; body?: string; // The body is optional and might not be present } = { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }; @@ -1465,12 +1451,12 @@ export const adminspendByProvider = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1508,12 +1494,12 @@ export const adminGlobalActivity = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1549,12 +1535,12 @@ export const adminGlobalCacheActivity = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1590,12 +1576,12 @@ export const adminGlobalActivityPerModel = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1636,12 +1622,12 @@ export const adminGlobalActivityExceptions = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1682,12 +1668,12 @@ export const adminGlobalActivityExceptionsPerDeployment = async ( const requestOptions: { method: string; headers: { - litellm_key_header_name: string; + [globalLitellmHeaderName]: string; }; } = { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, }, }; @@ -1716,7 +1702,7 @@ export const adminTopModelsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1743,7 +1729,7 @@ export const keyInfoCall = async (accessToken: String, keys: String[]) => { const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1773,7 +1759,7 @@ export const spendUsersCall = async (accessToken: String, userID: String) => { const response = await fetch(`${url}?user_id=${userID}`, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1805,7 +1791,7 @@ export const userRequestModelCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1840,7 +1826,7 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1880,7 +1866,7 @@ export const userGetAllUsersCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1909,7 +1895,7 @@ export const getPossibleUserRoles = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -1938,7 +1924,7 @@ export const teamCreateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -1974,7 +1960,7 @@ export const keyUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2009,7 +1995,7 @@ export const teamUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2044,7 +2030,7 @@ export const modelUpdateCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2088,7 +2074,7 @@ export const teamMemberAddCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2131,7 +2117,7 @@ export const userUpdateUserCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: response_body, @@ -2169,7 +2155,7 @@ export const PredictedSpendLogsCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2205,7 +2191,7 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2244,7 +2230,7 @@ export const serviceHealthCheck = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2279,7 +2265,7 @@ export const getBudgetList = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2312,7 +2298,7 @@ export const getBudgetSettings = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2350,7 +2336,7 @@ export const getCallbacksCall = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2381,7 +2367,7 @@ export const getGeneralSettingsCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2415,7 +2401,7 @@ export const getConfigFieldSetting = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2453,7 +2439,7 @@ export const updateConfigFieldSetting = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify(formData), @@ -2493,7 +2479,7 @@ export const deleteConfigFieldSetting = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify(formData), @@ -2528,7 +2514,7 @@ export const setCallbacksCall = async ( const response = await fetch(url, { method: "POST", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -2563,7 +2549,7 @@ export const healthCheckCall = async (accessToken: String) => { const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); @@ -2599,7 +2585,7 @@ export const getProxyBaseUrlAndLogoutUrl = async ( const response = await fetch(url, { method: "GET", headers: { - litellm_key_header_name: `Bearer ${accessToken}`, + [globalLitellmHeaderName]: `Bearer ${accessToken}`, "Content-Type": "application/json", }, }); From f25ed92ee29ab7e6b6ace05f5a015c294d3e9af5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 16:59:15 -0700 Subject: [PATCH 162/655] better debugging for custom headers --- litellm/proxy/auth/user_api_key_auth.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index d91baf5ca..c4401a2cf 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -1310,8 +1310,9 @@ def get_api_key_from_custom_header( # use this as the virtual key passed to litellm proxy custom_litellm_key_header_name = custom_litellm_key_header_name.lower() verbose_proxy_logger.debug( - "searching for custom_litellm_key_header_name= %s", + "searching for custom_litellm_key_header_name= %s, in headers=%s", custom_litellm_key_header_name, + request.headers, ) custom_api_key = request.headers.get(custom_litellm_key_header_name) if custom_api_key: From 0c25aaf9dfcd47e512035afc2584aa88a1802c7d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 17:03:04 -0700 Subject: [PATCH 163/655] check litellm header in login on ui --- .../key_management_endpoints.py | 19 ------------------- litellm/proxy/proxy_server.py | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 824c40de1..0e4696e44 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -914,22 +914,3 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None) verbose_proxy_logger.debug(traceback.format_exc()) raise e return deleted_tokens - - -@router.get( - "/litellm_header_name", - include_in_schema=False, -) -async def get_litellm_header_name(): - """ - Used by LiteLLM Admin UI - - returns the header name that should be used for the Authorization header on requests to litellm - """ - from litellm.proxy.proxy_server import general_settings - - if "litellm_key_header_name" in general_settings: - return {"litellm_key_header_name": general_settings["litellm_key_header_name"]} - else: - # default value - return {"litellm_key_header_name": "Authorization"} diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1f35a06f0..41078130b 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -7773,7 +7773,7 @@ async def fallback_login(request: Request): "/login", include_in_schema=False ) # hidden since this is a helper for UI sso login async def login(request: Request): - global premium_user + global premium_user, general_settings try: import multipart except ImportError: @@ -7875,6 +7875,9 @@ async def login(request: Request): "user_role": user_role, # this is the path without sso - we can assume only admins will use this "login_method": "username_password", "premium_user": premium_user, + "auth_header_name": general_settings.get( + "litellm_key_header_name", "Authorization" + ), }, master_key, algorithm="HS256", @@ -7939,6 +7942,9 @@ async def login(request: Request): "user_role": user_role, "login_method": "username_password", "premium_user": premium_user, + "auth_header_name": general_settings.get( + "litellm_key_header_name", "Authorization" + ), }, master_key, algorithm="HS256", @@ -7987,7 +7993,7 @@ async def onboarding(invite_link: str): - Get user from db - Pass in user_email if set """ - global prisma_client, master_key + global prisma_client, master_key, general_settings if master_key is None: raise ProxyException( message="Master Key not set for Proxy. Please set Master Key to use Admin UI. Set `LITELLM_MASTER_KEY` in .env or set general_settings:master_key in config.yaml. https://docs.litellm.ai/docs/proxy/virtual_keys. If set, use `--detailed_debug` to debug issue.", @@ -8074,6 +8080,9 @@ async def onboarding(invite_link: str): "user_role": user_obj.user_role, "login_method": "username_password", "premium_user": premium_user, + "auth_header_name": general_settings.get( + "litellm_key_header_name", "Authorization" + ), }, master_key, algorithm="HS256", @@ -8491,6 +8500,9 @@ async def auth_callback(request: Request): "user_role": user_role, "login_method": "sso", "premium_user": premium_user, + "auth_header_name": general_settings.get( + "litellm_key_header_name", "Authorization" + ), }, master_key, algorithm="HS256", From c3ac6b098ee73a17baa63f3c6775d1c53e5d49ca Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 17:19:28 -0700 Subject: [PATCH 164/655] fix linting error - cohere_chat --- litellm/llms/cohere_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/cohere_chat.py b/litellm/llms/cohere_chat.py index 1b4e0d798..9321d6f8b 100644 --- a/litellm/llms/cohere_chat.py +++ b/litellm/llms/cohere_chat.py @@ -235,7 +235,7 @@ def completion( optional_params["message"] = most_recent_message data = { "model": model, - "chat_history": chat_histrory, + "chat_history": chat_history, **optional_params, } From 528741a809043851461b64cdd27b5f813dd04e0c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 17:47:33 -0700 Subject: [PATCH 165/655] fix(factory.py): handle special keys for mistral chat template --- litellm/llms/prompt_templates/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 2218fa568..99b3f97e1 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -235,7 +235,7 @@ def mistral_api_pt(messages): """ new_messages = [] for m in messages: - special_keys = ["role", "content", "tool_calls"] + special_keys = ["role", "content", "tool_calls", "function_call"] extra_args = {} if isinstance(m, dict): for k, v in m.items(): From 130815bb933bd9ce11c1e9ca8224fc9acc256eed Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 18:42:42 -0700 Subject: [PATCH 166/655] add litellm.create_fine_tuning_job --- litellm/fine_tuning/main.py | 182 ++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 litellm/fine_tuning/main.py diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py new file mode 100644 index 000000000..6fa2bd9fb --- /dev/null +++ b/litellm/fine_tuning/main.py @@ -0,0 +1,182 @@ +""" +Main File for Fine Tuning API implementation + +https://platform.openai.com/docs/api-reference/fine-tuning + +- fine_tuning.jobs.create() +- fine_tuning.jobs.list() +- client.fine_tuning.jobs.list_events() +""" + +import asyncio +import contextvars +import os +from functools import partial +from typing import Any, Coroutine, Dict, Literal, Optional, Union + +import httpx + +import litellm +from litellm.llms.openai_fine_tuning.openai import ( + FineTuningJob, + FineTuningJobCreate, + OpenAIFineTuningAPI, +) +from litellm.types.llms.openai import Hyperparameters +from litellm.types.router import * +from litellm.utils import supports_httpx_timeout + +####### ENVIRONMENT VARIABLES ################### +openai_fine_tuning_instance = OpenAIFineTuningAPI() +################################################# + + +async def acreate_fine_tuning_job( + model: str, + training_file: str, + hyperparameters: Optional[Hyperparameters] = None, + suffix: Optional[str] = None, + validation_file: Optional[str] = None, + integrations: Optional[List[str]] = None, + seed: Optional[int] = None, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> FineTuningJob: + """ + Async: Creates and executes a batch from an uploaded file of request + + LiteLLM Equivalent of POST: https://api.openai.com/v1/batches + """ + try: + loop = asyncio.get_event_loop() + kwargs["acreate_fine_tuning_job"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + create_fine_tuning_job, + model, + training_file, + hyperparameters, + suffix, + validation_file, + integrations, + seed, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + return response + except Exception as e: + raise e + + +def create_fine_tuning_job( + model: str, + training_file: str, + hyperparameters: Optional[Hyperparameters] = None, + suffix: Optional[str] = None, + validation_file: Optional[str] = None, + integrations: Optional[List[str]] = None, + seed: Optional[int] = None, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Union[FineTuningJob, Coroutine[Any, Any, FineTuningJob]]: + """ + Creates a fine-tuning job which begins the process of creating a new model from a given dataset. + + Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete + + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("acreate_fine_tuning_job", False) is True + + create_fine_tuning_job_data = FineTuningJobCreate( + model=model, + training_file=training_file, + hyperparameters=hyperparameters, + suffix=suffix, + validation_file=validation_file, + integrations=integrations, + seed=seed, + ) + + response = openai_fine_tuning_instance.create_fine_tuning_job( + api_base=api_base, + api_key=api_key, + organization=organization, + create_fine_tuning_job_data=create_fine_tuning_job_data, + timeout=timeout, + max_retries=optional_params.max_retries, + _is_async=_is_async, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e From 59fc3ba649acc6178ea4a078fc250bd47ce6d086 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 18:57:29 -0700 Subject: [PATCH 167/655] add create_fine_tuning --- litellm/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/__init__.py b/litellm/__init__.py index 97a0a05ea..72aeb74d9 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -906,6 +906,7 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * +from .fine_tuning.main import * from .files.main import * from .scheduler import * from .cost_calculator import response_cost_calculator, cost_per_token From 1202e1c645c1c6d6db33ed618593bd948cfc0cf1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 18:59:29 -0700 Subject: [PATCH 168/655] add acreate_fine_tuning_job --- litellm/fine_tuning/main.py | 4 +- litellm/llms/openai_fine_tuning/openai.py | 96 +++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 litellm/llms/openai_fine_tuning/openai.py diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index 6fa2bd9fb..de899fe4b 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -34,7 +34,7 @@ openai_fine_tuning_instance = OpenAIFineTuningAPI() async def acreate_fine_tuning_job( model: str, training_file: str, - hyperparameters: Optional[Hyperparameters] = None, + hyperparameters: Optional[Hyperparameters] = {}, suffix: Optional[str] = None, validation_file: Optional[str] = None, integrations: Optional[List[str]] = None, @@ -85,7 +85,7 @@ async def acreate_fine_tuning_job( def create_fine_tuning_job( model: str, training_file: str, - hyperparameters: Optional[Hyperparameters] = None, + hyperparameters: Optional[Hyperparameters] = {}, suffix: Optional[str] = None, validation_file: Optional[str] = None, integrations: Optional[List[str]] = None, diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py new file mode 100644 index 000000000..c96410832 --- /dev/null +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -0,0 +1,96 @@ +from typing import Any, Coroutine, Optional, Union + +import httpx +from openai import AsyncOpenAI, OpenAI +from openai.types.fine_tuning import FineTuningJob + +from litellm._logging import verbose_logger +from litellm.llms.base import BaseLLM +from litellm.types.llms.openai import FineTuningJobCreate + + +class OpenAIFineTuningAPI(BaseLLM): + """ + OpenAI methods to support for batches + """ + + def __init__(self) -> None: + super().__init__() + + def get_openai_client( + self, + api_key: Optional[str], + api_base: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + _is_async: bool = False, + ) -> Optional[Union[OpenAI, AsyncOpenAI]]: + received_args = locals() + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None + if client is None: + data = {} + for k, v in received_args.items(): + if k == "self" or k == "client" or k == "_is_async": + pass + elif k == "api_base" and v is not None: + data["base_url"] = v + elif v is not None: + data[k] = v + if _is_async is True: + openai_client = AsyncOpenAI(**data) + else: + openai_client = OpenAI(**data) # type: ignore + else: + openai_client = client + + return openai_client + + async def acreate_fine_tuning_job( + self, + create_fine_tuning_job_data: FineTuningJobCreate, + openai_client: AsyncOpenAI, + ) -> FineTuningJob: + response = await openai_client.batches.create(**create_fine_tuning_job_data) + return response + + def create_fine_tuning_job( + self, + _is_async: bool, + create_fine_tuning_job_data: FineTuningJobCreate, + api_key: Optional[str], + api_base: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ) -> Union[Coroutine[Any, Any, FineTuningJob]]: + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.acreate_fine_tuning_job( # type: ignore + create_fine_tuning_job_data=create_fine_tuning_job_data, + openai_client=openai_client, + ) + verbose_logger.debug( + "creating fine tuning job, args= %s", create_fine_tuning_job_data + ) + response = openai_client.fine_tuning.jobs.create(**create_fine_tuning_job_data) + return response From 5123bf4e75c96d7e6f37ea51958cf68d7864df71 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 18:59:44 -0700 Subject: [PATCH 169/655] add types for FineTuningJobCreate OpenAI --- litellm/types/llms/openai.py | 78 +++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 35e442119..fcff8b4ba 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -30,7 +30,7 @@ from openai.types.beta.thread_create_params import ( from openai.types.beta.threads.message import Message as OpenAIMessage from openai.types.beta.threads.message_content import MessageContent from openai.types.beta.threads.run import Run -from pydantic import BaseModel +from pydantic import BaseModel, Field from typing_extensions import Dict, Required, override FileContent = Union[IO[bytes], bytes, PathLike] @@ -455,3 +455,79 @@ class ChatCompletionUsageBlock(TypedDict): prompt_tokens: int completion_tokens: int total_tokens: int + + +class Hyperparameters(TypedDict): + batch_size: Optional[Union[str, int]] = Field( + default="auto", description="Number of examples in each batch." + ) + learning_rate_multiplier: Optional[Union[str, float]] = Field( + default="auto", description="Scaling factor for the learning rate." + ) + n_epochs: Optional[Union[str, int]] = Field( + default="auto", description="The number of epochs to train the model for." + ) + + +class FineTuningJobCreate(TypedDict): + """ + FineTuningJobCreate - Create a fine-tuning job + + Example Request + ``` + { + "model": "gpt-3.5-turbo", + "training_file": "file-abc123", + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": 0.1, + "n_epochs": 3 + }, + "suffix": "custom-model-name", + "validation_file": "file-xyz789", + "integrations": ["slack"], + "seed": 42 + } + ``` + """ + + model: str = Field(..., description="The name of the model to fine-tune.") + training_file: str = Field( + ..., description="The ID of an uploaded file that contains training data." + ) + hyperparameters: Optional[Hyperparameters] = Field( + default={}, description="The hyperparameters used for the fine-tuning job." + ) + suffix: Optional[str] = Field( + default=None, + description="A string of up to 18 characters that will be added to your fine-tuned model name.", + ) + validation_file: Optional[str] = Field( + default=None, + description="The ID of an uploaded file that contains validation data.", + ) + integrations: Optional[List[str]] = Field( + default=None, + description="A list of integrations to enable for your fine-tuning job.", + ) + seed: Optional[int] = Field( + default=None, description="The seed controls the reproducibility of the job." + ) + + class Config: + allow_population_by_field_name = True + schema_extra = { + "example": { + "model": "gpt-3.5-turbo", + "training_file": "file-abc123", + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": 0.1, + "n_epochs": 3, + }, + "suffix": "custom-model-name", + "validation_file": "file-xyz789", + "integrations": ["slack"], + "seed": 42, + } + } From 3e3f9e3f0cbe746a13fef002adab1cb380c78f12 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 18:59:55 -0700 Subject: [PATCH 170/655] add test_create_fine_tune_job --- litellm/tests/test_fine_tuning_api.py | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 litellm/tests/test_fine_tuning_api.py diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py new file mode 100644 index 000000000..eec81d007 --- /dev/null +++ b/litellm/tests/test_fine_tuning_api.py @@ -0,0 +1,47 @@ +import os +import sys +import traceback + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +from openai import APITimeoutError as Timeout + +import litellm + +litellm.num_retries = 0 +import logging + +from litellm import create_fine_tuning_job +from litellm._logging import verbose_logger + + +def test_create_fine_tune_job(): + verbose_logger.setLevel(logging.DEBUG) + file_name = "openai_batch_completions.jsonl" + _current_dir = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(_current_dir, file_name) + + file_obj = litellm.create_file( + file=open(file_path, "rb"), + purpose="fine-tune", + custom_llm_provider="openai", + ) + print("Response from creating file=", file_obj) + + response = litellm.create_fine_tuning_job( + model="gpt-3.5-turbo", + training_file=file_obj.id, + ) + + print("response from litellm.create_fine_tuning_job=", response) + + assert response.id is not None + assert response.model == "gpt-3.5-turbo" + + # delete file + + # cancel ft job + pass From 3802eaa6b558e28c788c435811a1588e64115fce Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:22:23 -0700 Subject: [PATCH 171/655] feat - add cancel_fine_tuning_job --- litellm/fine_tuning/main.py | 84 +++++++++++++++++++++++ litellm/llms/openai_fine_tuning/openai.py | 50 ++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index de899fe4b..8784fdd9b 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -180,3 +180,87 @@ def create_fine_tuning_job( return response except Exception as e: raise e + + +def cancel_fine_tuning_job( + fine_tuning_job_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Union[FineTuningJob, Coroutine[Any, Any, FineTuningJob]]: + """ + Creates a fine-tuning job which begins the process of creating a new model from a given dataset. + + Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete + + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("acreate_fine_tuning_job", False) is True + + response = openai_fine_tuning_instance.cancel_fine_tuning_job( + api_base=api_base, + api_key=api_key, + organization=organization, + fine_tuning_job_id=fine_tuning_job_id, + timeout=timeout, + max_retries=optional_params.max_retries, + _is_async=_is_async, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py index c96410832..91924edab 100644 --- a/litellm/llms/openai_fine_tuning/openai.py +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -94,3 +94,53 @@ class OpenAIFineTuningAPI(BaseLLM): ) response = openai_client.fine_tuning.jobs.create(**create_fine_tuning_job_data) return response + + async def acancel_fine_tuning_job( + self, + fine_tuning_job_id: str, + openai_client: AsyncOpenAI, + ) -> FineTuningJob: + response = await openai_client.fine_tuning.jobs.cancel( + fine_tuning_job_id=fine_tuning_job_id + ) + return response + + def cancel_fine_tuning_job( + self, + _is_async: bool, + fine_tuning_job_id: str, + api_key: Optional[str], + api_base: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ): + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.acancel_fine_tuning_job( # type: ignore + fine_tuning_job_id=fine_tuning_job_id, + openai_client=openai_client, + ) + verbose_logger.debug("canceling fine tuning job, args= %s", fine_tuning_job_id) + response = openai_client.fine_tuning.jobs.cancel( + fine_tuning_job_id=fine_tuning_job_id + ) + return response From 16d595c4ff0287400b220bab479faccbf3ffbf34 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:22:41 -0700 Subject: [PATCH 172/655] test cancel cancel_fine_tuning_job --- litellm/tests/test_fine_tuning_api.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py index eec81d007..d4935cb0d 100644 --- a/litellm/tests/test_fine_tuning_api.py +++ b/litellm/tests/test_fine_tuning_api.py @@ -31,17 +31,29 @@ def test_create_fine_tune_job(): ) print("Response from creating file=", file_obj) - response = litellm.create_fine_tuning_job( - model="gpt-3.5-turbo", + create_fine_tuning_response = litellm.create_fine_tuning_job( + model="gpt-3.5-turbo-0125", training_file=file_obj.id, ) - print("response from litellm.create_fine_tuning_job=", response) + print("response from litellm.create_fine_tuning_job=", create_fine_tuning_response) - assert response.id is not None - assert response.model == "gpt-3.5-turbo" + assert create_fine_tuning_response.id is not None + assert create_fine_tuning_response.model == "gpt-3.5-turbo-0125" # delete file + litellm.file_delete( + file_id=file_obj.id, + ) + # cancel ft job + response = litellm.cancel_fine_tuning_job( + fine_tuning_job_id=create_fine_tuning_response.id, + ) + + print("response from litellm.cancel_fine_tuning_job=", response) + + assert response.status == "cancelled" + assert response.id == create_fine_tuning_response.id pass From 8e6df89f8aaac34a5da293bcee42c9d84affadeb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:23:35 -0700 Subject: [PATCH 173/655] fix doc string --- litellm/fine_tuning/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index 8784fdd9b..086610684 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -190,7 +190,7 @@ def cancel_fine_tuning_job( **kwargs, ) -> Union[FineTuningJob, Coroutine[Any, Any, FineTuningJob]]: """ - Creates a fine-tuning job which begins the process of creating a new model from a given dataset. + Immediately cancel a fine-tune job. Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete From 46772436f1403dfc39aec48b9edee2ba419592d8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:25:36 -0700 Subject: [PATCH 174/655] async cancel ft job --- litellm/fine_tuning/main.py | 40 +++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index 086610684..e5f2a4555 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -47,7 +47,6 @@ async def acreate_fine_tuning_job( """ Async: Creates and executes a batch from an uploaded file of request - LiteLLM Equivalent of POST: https://api.openai.com/v1/batches """ try: loop = asyncio.get_event_loop() @@ -182,6 +181,43 @@ def create_fine_tuning_job( raise e +async def acancel_fine_tuning_job( + fine_tuning_job_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> FineTuningJob: + """ + Async: Immediately cancel a fine-tune job. + """ + try: + loop = asyncio.get_event_loop() + kwargs["acancel_fine_tuning_job"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + cancel_fine_tuning_job, + fine_tuning_job_id, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + return response + except Exception as e: + raise e + + def cancel_fine_tuning_job( fine_tuning_job_id: str, custom_llm_provider: Literal["openai"] = "openai", @@ -237,7 +273,7 @@ def cancel_fine_tuning_job( elif timeout is None: timeout = 600.0 - _is_async = kwargs.pop("acreate_fine_tuning_job", False) is True + _is_async = kwargs.pop("acancel_fine_tuning_job", False) is True response = openai_fine_tuning_instance.cancel_fine_tuning_job( api_base=api_base, From 76f4aa80e3495254f4ae25b8bbb52810e2c15510 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 19:40:12 -0700 Subject: [PATCH 175/655] docs(deploy.md): support running litellm docker container without internet connection --- docs/my-website/docs/proxy/deploy.md | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md index e8bc432b8..35fc0a508 100644 --- a/docs/my-website/docs/proxy/deploy.md +++ b/docs/my-website/docs/proxy/deploy.md @@ -558,6 +558,39 @@ docker run --name litellm-proxy \ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml ``` +## LiteLLM without Internet Connection + +By default `prisma generate` downloads [prisma's engine binaries](https://www.prisma.io/docs/orm/reference/environment-variables-reference#custom-engine-file-locations). This might cause errors when running without internet connection. + +Use this dockerfile to build an image which pre-generates the prisma binaries. + +```Dockerfile +# Use the provided base image +FROM ghcr.io/berriai/litellm:main-latest + +# Set the working directory to /app +WORKDIR /app + +### [👇 KEY STEP] ### +# Install Prisma CLI and generate Prisma client +RUN pip install prisma +RUN prisma generate +### FIN #### + + +# Expose the necessary port +EXPOSE 4000 + +# Override the CMD instruction with your desired command and arguments +# WARNING: FOR PROD DO NOT USE `--detailed_debug` it slows down response times, instead use the following CMD +# CMD ["--port", "4000", "--config", "config.yaml"] + +# Define the command to run your app +ENTRYPOINT ["litellm"] + +CMD ["--port", "4000"] +``` + ## Advanced Deployment Settings ### 1. Customization of the server root path (custom Proxy base url) From 4849df03ff551f38653e0704a9bc5ac9dec8e879 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:46:55 -0700 Subject: [PATCH 176/655] add list fine tune endpoints --- litellm/fine_tuning/main.py | 127 ++++++++++++++++++++++ litellm/llms/openai_fine_tuning/openai.py | 51 +++++++++ 2 files changed, 178 insertions(+) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index e5f2a4555..8bb9bf1a5 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -300,3 +300,130 @@ def cancel_fine_tuning_job( return response except Exception as e: raise e + + +async def alist_fine_tuning_jobs( + after: Optional[str] = None, + limit: Optional[int] = None, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> FineTuningJob: + """ + Async: List your organization's fine-tuning jobs + """ + try: + loop = asyncio.get_event_loop() + kwargs["alist_fine_tuning_jobs"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + cancel_fine_tuning_job, + after, + limit, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + return response + except Exception as e: + raise e + + +def list_fine_tuning_jobs( + after: Optional[str] = None, + limit: Optional[int] = None, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +): + """ + List your organization's fine-tuning jobs + + Params: + + - after: Optional[str] = None, Identifier for the last job from the previous pagination request. + - limit: Optional[int] = None, Number of fine-tuning jobs to retrieve. Defaults to 20 + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("alist_fine_tuning_jobs", False) is True + + response = openai_fine_tuning_instance.list_fine_tuning_jobs( + api_base=api_base, + api_key=api_key, + organization=organization, + after=after, + limit=limit, + timeout=timeout, + max_retries=optional_params.max_retries, + _is_async=_is_async, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py index 91924edab..ee79483b7 100644 --- a/litellm/llms/openai_fine_tuning/openai.py +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -2,6 +2,7 @@ from typing import Any, Coroutine, Optional, Union import httpx from openai import AsyncOpenAI, OpenAI +from openai.pagination import AsyncCursorPage from openai.types.fine_tuning import FineTuningJob from litellm._logging import verbose_logger @@ -144,3 +145,53 @@ class OpenAIFineTuningAPI(BaseLLM): fine_tuning_job_id=fine_tuning_job_id ) return response + + async def alist_fine_tuning_jobs( + self, + openai_client: AsyncOpenAI, + after: Optional[str] = None, + limit: Optional[int] = None, + ): + response = await openai_client.fine_tuning.jobs.list(after=after, limit=limit) + return response + + def list_fine_tuning_jobs( + self, + _is_async: bool, + api_key: Optional[str], + api_base: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + after: Optional[str] = None, + limit: Optional[int] = None, + ): + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.alist_fine_tuning_jobs( # type: ignore + after=after, + limit=limit, + openai_client=openai_client, + ) + verbose_logger.debug("list fine tuning job, after= %s, limit= %s", after, limit) + response = openai_client.fine_tuning.jobs.list(after=after, limit=limit) + return response + pass From 106626f2248b8219f34887489f773b6621568563 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:47:14 -0700 Subject: [PATCH 177/655] test - list_fine_tuning_jobs --- litellm/tests/test_fine_tuning_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py index d4935cb0d..4a3922697 100644 --- a/litellm/tests/test_fine_tuning_api.py +++ b/litellm/tests/test_fine_tuning_api.py @@ -41,6 +41,12 @@ def test_create_fine_tune_job(): assert create_fine_tuning_response.id is not None assert create_fine_tuning_response.model == "gpt-3.5-turbo-0125" + # list fine tuning jobs + print("listing ft jobs") + ft_jobs = litellm.list_fine_tuning_jobs(limit=2) + print("response from litellm.list_fine_tuning_jobs=", ft_jobs) + assert len(ft_jobs) > 0 + # delete file litellm.file_delete( From c9bea3a879913700e99dbe897a00ba2275705bfc Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 19:52:14 -0700 Subject: [PATCH 178/655] test - async ft jobs --- litellm/fine_tuning/main.py | 2 +- litellm/llms/openai_fine_tuning/openai.py | 4 +- litellm/tests/test_fine_tuning_api.py | 51 ++++++++++++++++++++++- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index 8bb9bf1a5..b41ced1b9 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -319,7 +319,7 @@ async def alist_fine_tuning_jobs( # Use a partial function to pass your keyword arguments func = partial( - cancel_fine_tuning_job, + list_fine_tuning_jobs, after, limit, custom_llm_provider, diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py index ee79483b7..b955b9ce8 100644 --- a/litellm/llms/openai_fine_tuning/openai.py +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -53,7 +53,9 @@ class OpenAIFineTuningAPI(BaseLLM): create_fine_tuning_job_data: FineTuningJobCreate, openai_client: AsyncOpenAI, ) -> FineTuningJob: - response = await openai_client.batches.create(**create_fine_tuning_job_data) + response = await openai_client.fine_tuning.jobs.create( + **create_fine_tuning_job_data + ) return response def create_fine_tuning_job( diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py index 4a3922697..b7e3c957c 100644 --- a/litellm/tests/test_fine_tuning_api.py +++ b/litellm/tests/test_fine_tuning_api.py @@ -45,7 +45,8 @@ def test_create_fine_tune_job(): print("listing ft jobs") ft_jobs = litellm.list_fine_tuning_jobs(limit=2) print("response from litellm.list_fine_tuning_jobs=", ft_jobs) - assert len(ft_jobs) > 0 + + assert len(list(ft_jobs)) > 0 # delete file @@ -63,3 +64,51 @@ def test_create_fine_tune_job(): assert response.status == "cancelled" assert response.id == create_fine_tuning_response.id pass + + +@pytest.mark.asyncio +async def test_create_fine_tune_jobs_async(): + verbose_logger.setLevel(logging.DEBUG) + file_name = "openai_batch_completions.jsonl" + _current_dir = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(_current_dir, file_name) + + file_obj = await litellm.acreate_file( + file=open(file_path, "rb"), + purpose="fine-tune", + custom_llm_provider="openai", + ) + print("Response from creating file=", file_obj) + + create_fine_tuning_response = await litellm.acreate_fine_tuning_job( + model="gpt-3.5-turbo-0125", + training_file=file_obj.id, + ) + + print("response from litellm.create_fine_tuning_job=", create_fine_tuning_response) + + assert create_fine_tuning_response.id is not None + assert create_fine_tuning_response.model == "gpt-3.5-turbo-0125" + + # list fine tuning jobs + print("listing ft jobs") + ft_jobs = await litellm.alist_fine_tuning_jobs(limit=2) + print("response from litellm.list_fine_tuning_jobs=", ft_jobs) + assert len(list(ft_jobs)) > 0 + + # delete file + + await litellm.afile_delete( + file_id=file_obj.id, + ) + + # cancel ft job + response = await litellm.acancel_fine_tuning_job( + fine_tuning_job_id=create_fine_tuning_response.id, + ) + + print("response from litellm.cancel_fine_tuning_job=", response) + + assert response.status == "cancelled" + assert response.id == create_fine_tuning_response.id + pass From 6abc49c6117d5a169bdba8490520c44d8dcbc2b8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 20:01:12 -0700 Subject: [PATCH 179/655] fix linting --- litellm/types/llms/openai.py | 39 ++++++++++++++---------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index fcff8b4ba..d6dbb8f5f 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -458,15 +458,11 @@ class ChatCompletionUsageBlock(TypedDict): class Hyperparameters(TypedDict): - batch_size: Optional[Union[str, int]] = Field( - default="auto", description="Number of examples in each batch." - ) - learning_rate_multiplier: Optional[Union[str, float]] = Field( - default="auto", description="Scaling factor for the learning rate." - ) - n_epochs: Optional[Union[str, int]] = Field( - default="auto", description="The number of epochs to train the model for." - ) + batch_size: Optional[Union[str, int]] # "Number of examples in each batch." + learning_rate_multiplier: Optional[ + Union[str, float] + ] # Scaling factor for the learning rate + n_epochs: Optional[Union[str, int]] # "The number of epochs to train the model for" class FineTuningJobCreate(TypedDict): @@ -498,21 +494,16 @@ class FineTuningJobCreate(TypedDict): hyperparameters: Optional[Hyperparameters] = Field( default={}, description="The hyperparameters used for the fine-tuning job." ) - suffix: Optional[str] = Field( - default=None, - description="A string of up to 18 characters that will be added to your fine-tuned model name.", - ) - validation_file: Optional[str] = Field( - default=None, - description="The ID of an uploaded file that contains validation data.", - ) - integrations: Optional[List[str]] = Field( - default=None, - description="A list of integrations to enable for your fine-tuning job.", - ) - seed: Optional[int] = Field( - default=None, description="The seed controls the reproducibility of the job." - ) + suffix: Optional[ + str + ] # "A string of up to 18 characters that will be added to your fine-tuned model name." + validation_file: Optional[ + str + ] # "The ID of an uploaded file that contains validation data." + integrations: Optional[ + List[str] + ] # "A list of integrations to enable for your fine-tuning job." + seed: Optional[int] # "The seed controls the reproducibility of the job." class Config: allow_population_by_field_name = True From f18827cbc01125d459fa657a84aed9ea23f8be01 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 20:10:03 -0700 Subject: [PATCH 180/655] fix type errors --- litellm/fine_tuning/main.py | 4 +-- litellm/llms/openai_fine_tuning/openai.py | 8 +++--- litellm/types/llms/openai.py | 30 ++++------------------- 3 files changed, 11 insertions(+), 31 deletions(-) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index b41ced1b9..eb5c7d4a4 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -34,7 +34,7 @@ openai_fine_tuning_instance = OpenAIFineTuningAPI() async def acreate_fine_tuning_job( model: str, training_file: str, - hyperparameters: Optional[Hyperparameters] = {}, + hyperparameters: Optional[Hyperparameters] = {}, # type: ignore suffix: Optional[str] = None, validation_file: Optional[str] = None, integrations: Optional[List[str]] = None, @@ -84,7 +84,7 @@ async def acreate_fine_tuning_job( def create_fine_tuning_job( model: str, training_file: str, - hyperparameters: Optional[Hyperparameters] = {}, + hyperparameters: Optional[Hyperparameters] = {}, # type: ignore suffix: Optional[str] = None, validation_file: Optional[str] = None, integrations: Optional[List[str]] = None, diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py index b955b9ce8..d81ed3760 100644 --- a/litellm/llms/openai_fine_tuning/openai.py +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -54,7 +54,7 @@ class OpenAIFineTuningAPI(BaseLLM): openai_client: AsyncOpenAI, ) -> FineTuningJob: response = await openai_client.fine_tuning.jobs.create( - **create_fine_tuning_job_data + **create_fine_tuning_job_data # type: ignore ) return response @@ -68,7 +68,7 @@ class OpenAIFineTuningAPI(BaseLLM): max_retries: Optional[int], organization: Optional[str], client: Optional[Union[OpenAI, AsyncOpenAI]] = None, - ) -> Union[Coroutine[Any, Any, FineTuningJob]]: + ) -> Union[FineTuningJob, Union[Coroutine[Any, Any, FineTuningJob]]]: openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( api_key=api_key, api_base=api_base, @@ -154,7 +154,7 @@ class OpenAIFineTuningAPI(BaseLLM): after: Optional[str] = None, limit: Optional[int] = None, ): - response = await openai_client.fine_tuning.jobs.list(after=after, limit=limit) + response = await openai_client.fine_tuning.jobs.list(after=after, limit=limit) # type: ignore return response def list_fine_tuning_jobs( @@ -194,6 +194,6 @@ class OpenAIFineTuningAPI(BaseLLM): openai_client=openai_client, ) verbose_logger.debug("list fine tuning job, after= %s, limit= %s", after, limit) - response = openai_client.fine_tuning.jobs.list(after=after, limit=limit) + response = openai_client.fine_tuning.jobs.list(after=after, limit=limit) # type: ignore return response pass diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index d6dbb8f5f..396e58e99 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -487,13 +487,11 @@ class FineTuningJobCreate(TypedDict): ``` """ - model: str = Field(..., description="The name of the model to fine-tune.") - training_file: str = Field( - ..., description="The ID of an uploaded file that contains training data." - ) - hyperparameters: Optional[Hyperparameters] = Field( - default={}, description="The hyperparameters used for the fine-tuning job." - ) + model: str # "The name of the model to fine-tune." + training_file: str # "The ID of an uploaded file that contains training data." + hyperparameters: Optional[ + Hyperparameters + ] # "The hyperparameters used for the fine-tuning job." suffix: Optional[ str ] # "A string of up to 18 characters that will be added to your fine-tuned model name." @@ -504,21 +502,3 @@ class FineTuningJobCreate(TypedDict): List[str] ] # "A list of integrations to enable for your fine-tuning job." seed: Optional[int] # "The seed controls the reproducibility of the job." - - class Config: - allow_population_by_field_name = True - schema_extra = { - "example": { - "model": "gpt-3.5-turbo", - "training_file": "file-abc123", - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": 0.1, - "n_epochs": 3, - }, - "suffix": "custom-model-name", - "validation_file": "file-xyz789", - "integrations": ["slack"], - "seed": 42, - } - } From dff8163f2cc3e25a037a8283a383c687382f53d9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 20:10:33 -0700 Subject: [PATCH 181/655] fix type errors --- litellm/llms/openai_fine_tuning/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/openai_fine_tuning/openai.py b/litellm/llms/openai_fine_tuning/openai.py index d81ed3760..2f6d89ea0 100644 --- a/litellm/llms/openai_fine_tuning/openai.py +++ b/litellm/llms/openai_fine_tuning/openai.py @@ -95,7 +95,7 @@ class OpenAIFineTuningAPI(BaseLLM): verbose_logger.debug( "creating fine tuning job, args= %s", create_fine_tuning_job_data ) - response = openai_client.fine_tuning.jobs.create(**create_fine_tuning_job_data) + response = openai_client.fine_tuning.jobs.create(**create_fine_tuning_job_data) # type: ignore return response async def acancel_fine_tuning_job( From 19d57314ee0b066b9ed15d06b0f85204a3ef87f2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 20:14:45 -0700 Subject: [PATCH 182/655] fix inc langfuse flish time --- litellm/tests/test_alangfuse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index bf475ae97..c9e98d62d 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -222,7 +222,7 @@ async def test_langfuse_logging_without_request_response(stream, langfuse_client print(chunk) langfuse_client.flush() - await asyncio.sleep(2) + await asyncio.sleep(5) # get trace with _unique_trace_name trace = langfuse_client.get_generations(trace_id=_unique_trace_name) @@ -273,7 +273,7 @@ async def test_langfuse_logging_audio_transcriptions(langfuse_client): ) langfuse_client.flush() - await asyncio.sleep(2) + await asyncio.sleep(5) # get trace with _unique_trace_name trace = langfuse_client.get_trace(id=_unique_trace_name) From 7d5022b0e0a34e8013063caf0d5f929de3bff4d3 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 29 Jul 2024 21:16:47 -0700 Subject: [PATCH 183/655] docs(scheduler.md): update docs with request timeout --- docs/my-website/docs/scheduler.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/my-website/docs/scheduler.md b/docs/my-website/docs/scheduler.md index e7943c459..8329fc2ad 100644 --- a/docs/my-website/docs/scheduler.md +++ b/docs/my-website/docs/scheduler.md @@ -147,6 +147,9 @@ model_list: mock_response: "hello world!" api_key: my-good-key +litellm_settings: + request_timeout: 600 # 👈 Will keep retrying until timeout occurs + router_settings: redis_host; os.environ/REDIS_HOST redis_password: os.environ/REDIS_PASSWORD From 6f34998cab6b7418c5f4024f84817fc6c71bb4d3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 29 Jul 2024 21:20:25 -0700 Subject: [PATCH 184/655] ui new build --- litellm/proxy/_experimental/out/404.html | 1 + .../_buildManifest.js | 0 .../_ssgManifest.js | 0 .../static/chunks/131-19b05e5ce40fa85d.js | 8 --- .../static/chunks/131-6a03368053f9d26d.js | 8 +++ ...f2a46f911d5.js => 759-83a8bdddfe32b5d9.js} | 4 +- .../static/chunks/777-71e19aabdac85a01.js | 1 + .../static/chunks/777-bcd4fbd0638662f5.js | 1 - ...6e2cde4d07.js => page-3c4dcad891da09b7.js} | 0 ...cdcfdbe5ac.js => page-5e1163825ccf7b7a.js} | 0 .../chunks/app/page-77bb32690a1a0f37.js | 1 - .../chunks/app/page-cab98591303c1c5d.js | 1 + litellm/proxy/_experimental/out/index.html | 2 +- litellm/proxy/_experimental/out/index.txt | 4 +- .../proxy/_experimental/out/model_hub.html | 1 + litellm/proxy/_experimental/out/model_hub.txt | 4 +- .../proxy/_experimental/out/onboarding.html | 1 + .../proxy/_experimental/out/onboarding.txt | 4 +- ui/litellm-dashboard/out/404.html | 2 +- .../_buildManifest.js | 0 .../_ssgManifest.js | 0 .../static/chunks/131-19b05e5ce40fa85d.js | 8 --- .../static/chunks/131-6a03368053f9d26d.js | 8 +++ ...f2a46f911d5.js => 759-83a8bdddfe32b5d9.js} | 4 +- .../static/chunks/777-71e19aabdac85a01.js | 1 + .../static/chunks/777-bcd4fbd0638662f5.js | 1 - ...6e2cde4d07.js => page-3c4dcad891da09b7.js} | 0 ...cdcfdbe5ac.js => page-5e1163825ccf7b7a.js} | 0 .../chunks/app/page-77bb32690a1a0f37.js | 1 - .../chunks/app/page-cab98591303c1c5d.js | 1 + ui/litellm-dashboard/out/index.html | 2 +- ui/litellm-dashboard/out/index.txt | 4 +- ui/litellm-dashboard/out/model_hub.html | 2 +- ui/litellm-dashboard/out/model_hub.txt | 4 +- ui/litellm-dashboard/out/onboarding.html | 2 +- ui/litellm-dashboard/out/onboarding.txt | 4 +- .../src/components/networking.tsx | 57 ++++--------------- 37 files changed, 54 insertions(+), 88 deletions(-) create mode 100644 litellm/proxy/_experimental/out/404.html rename litellm/proxy/_experimental/out/_next/static/{GTNnv1QAXCqc2TmAz4qqc => InOW9_FWGxjcBmhCsnjat}/_buildManifest.js (100%) rename litellm/proxy/_experimental/out/_next/static/{GTNnv1QAXCqc2TmAz4qqc => InOW9_FWGxjcBmhCsnjat}/_ssgManifest.js (100%) delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js rename litellm/proxy/_experimental/out/_next/static/chunks/{759-d7572f2a46f911d5.js => 759-83a8bdddfe32b5d9.js} (53%) create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/777-71e19aabdac85a01.js delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/777-bcd4fbd0638662f5.js rename litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/{page-6575356e2cde4d07.js => page-3c4dcad891da09b7.js} (100%) rename litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/{page-c73480cdcfdbe5ac.js => page-5e1163825ccf7b7a.js} (100%) delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/app/page-77bb32690a1a0f37.js create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/app/page-cab98591303c1c5d.js create mode 100644 litellm/proxy/_experimental/out/model_hub.html create mode 100644 litellm/proxy/_experimental/out/onboarding.html rename ui/litellm-dashboard/out/_next/static/{GTNnv1QAXCqc2TmAz4qqc => InOW9_FWGxjcBmhCsnjat}/_buildManifest.js (100%) rename ui/litellm-dashboard/out/_next/static/{GTNnv1QAXCqc2TmAz4qqc => InOW9_FWGxjcBmhCsnjat}/_ssgManifest.js (100%) delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/131-19b05e5ce40fa85d.js create mode 100644 ui/litellm-dashboard/out/_next/static/chunks/131-6a03368053f9d26d.js rename ui/litellm-dashboard/out/_next/static/chunks/{759-d7572f2a46f911d5.js => 759-83a8bdddfe32b5d9.js} (53%) create mode 100644 ui/litellm-dashboard/out/_next/static/chunks/777-71e19aabdac85a01.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/777-bcd4fbd0638662f5.js rename ui/litellm-dashboard/out/_next/static/chunks/app/model_hub/{page-6575356e2cde4d07.js => page-3c4dcad891da09b7.js} (100%) rename ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/{page-c73480cdcfdbe5ac.js => page-5e1163825ccf7b7a.js} (100%) delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/page-77bb32690a1a0f37.js create mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/page-cab98591303c1c5d.js diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html new file mode 100644 index 000000000..1bf101ffe --- /dev/null +++ b/litellm/proxy/_experimental/out/404.html @@ -0,0 +1 @@ +404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/GTNnv1QAXCqc2TmAz4qqc/_buildManifest.js b/litellm/proxy/_experimental/out/_next/static/InOW9_FWGxjcBmhCsnjat/_buildManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/GTNnv1QAXCqc2TmAz4qqc/_buildManifest.js rename to litellm/proxy/_experimental/out/_next/static/InOW9_FWGxjcBmhCsnjat/_buildManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/GTNnv1QAXCqc2TmAz4qqc/_ssgManifest.js b/litellm/proxy/_experimental/out/_next/static/InOW9_FWGxjcBmhCsnjat/_ssgManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/GTNnv1QAXCqc2TmAz4qqc/_ssgManifest.js rename to litellm/proxy/_experimental/out/_next/static/InOW9_FWGxjcBmhCsnjat/_ssgManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js b/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js deleted file mode 100644 index 92aaed0dd..000000000 --- a/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js +++ /dev/null @@ -1,8 +0,0 @@ -"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[131],{84174:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(64090),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},50459:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(64090),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},92836:function(e,t,n){n.d(t,{Z:function(){return p}});var a=n(69703),r=n(80991),i=n(2898),o=n(99250),s=n(65492),l=n(64090),c=n(41608),d=n(50027);n(18174),n(21871),n(41213);let u=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(d.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(u("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(u("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},26734:function(e,t,n){n.d(t,{Z:function(){return c}});var a=n(69703),r=n(80991),i=n(99250),o=n(65492),s=n(64090);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:d,className:u}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",u)},p),d)});c.displayName="TabGroup"},41608:function(e,t,n){n.d(t,{O:function(){return c},Z:function(){return u}});var a=n(69703),r=n(64090),i=n(50027);n(18174),n(21871),n(41213);var o=n(80991),s=n(99250);let l=(0,n(65492).fn)("TabList"),c=(0,r.createContext)("line"),d={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},u=r.forwardRef((e,t)=>{let{color:n,variant:u="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",d[u],g)},m),r.createElement(c.Provider,{value:u},r.createElement(i.Z.Provider,{value:n},p)))});u.displayName="TabList"},32126:function(e,t,n){n.d(t,{Z:function(){return d}});var a=n(69703);n(50027);var r=n(18174);n(21871);var i=n(41213),o=n(99250),s=n(65492),l=n(64090);let c=(0,s.fn)("TabPanel"),d=l.forwardRef((e,t)=>{let{children:n,className:s}=e,d=(0,a._T)(e,["children","className"]),{selectedValue:u}=(0,l.useContext)(i.Z),p=u===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},d),n)});d.displayName="TabPanel"},23682:function(e,t,n){n.d(t,{Z:function(){return u}});var a=n(69703),r=n(80991);n(50027);var i=n(18174);n(21871);var o=n(41213),s=n(99250),l=n(65492),c=n(64090);let d=(0,l.fn)("TabPanels"),u=c.forwardRef((e,t)=>{let{children:n,className:l}=e,u=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(d("root"),"w-full",l)},u),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});u.displayName="TabPanels"},50027:function(e,t,n){n.d(t,{Z:function(){return i}});var a=n(64090),r=n(54942);n(99250);let i=(0,a.createContext)(r.fr.Blue)},18174:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)(0)},21871:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)(void 0)},41213:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)({selectedValue:void 0,handleValueChange:void 0})},21467:function(e,t,n){n.d(t,{i:function(){return s}});var a=n(64090),r=n(44329),i=n(54165),o=n(57499);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,d=a.useRef(null),[u,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=d.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>d.current});return i&&(S=i(S)),a.createElement("div",{ref:d,style:{paddingBottom:u,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},99129:function(e,t,n){let a;n.d(t,{Z:function(){return eY}});var r=n(63787),i=n(64090),o=n(37274),s=n(57499),l=n(54165),c=n(99537),d=n(77136),u=n(20653),p=n(40388),g=n(16480),m=n.n(g),b=n(51761),f=n(47387),E=n(70595),h=n(24750),S=n(89211),y=n(1861),T=n(51350),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:d,actionFn:u}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!u){f();return}if(l){var n;if(t=u(e),d&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(u.length)t=u(o),p.current=!1;else if(!(t=u())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(81303),w=n(14749),k=n(80406),C=n(88804),O=i.createContext({}),x=n(5239),L=n(31506),D=n(91010),P=n(4295),M=n(72480);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(49367),G=n(74084),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,d=e.ariaId,u=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),u&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},u)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:d},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?d:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,d=e.motionName,u=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:d,removeOnLeave:c,ref:b},function(s,l){var c=s.className,d=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:u,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},d),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,d=void 0===c||c,u=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},u),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&d){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(53850);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),d=(0,k.Z)(c,2),u=d[0],p=d[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||u)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||u,autoDestroy:!1,getContainer:n,autoLock:t||u},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(22127),Z=n(86718),X=n(47137),Q=n(92801),J=n(48563);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(17094),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(4678);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:d,cancelButtonProps:u,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:d,cancelButtonProps:u,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(11303),ec=n(13703),ed=n(58854),eu=n(80316),ep=n(76585),eg=n(8985);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,eu.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,ed._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(92935),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:d,className:u,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",d),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,u,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,d="".concat(t,"-confirm");return{[d]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(d,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(d,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(d,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(d,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(d,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(d,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(d,"-error ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(d,"-warning ").concat(d,"-body > ").concat(e.iconCls,",\n ").concat(d,"-confirm ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(d,"-info ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(d,"-success ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(d.Z,null);break;default:S=i.createElement(u.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:d,prefixCls:u,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(u,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===d},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:u,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),d=eO||c.getPrefixCls(),u=a||"".concat(d,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:d,prefixCls:u,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:u,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function u(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,d(s)}return d(s),eC.push(u),{destroy:u,update:function(e){d(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(21467),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:d}=e,u=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==d&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},u,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(79474),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,d]=i.useState(!0),[u,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){d(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);u.onCancel&&a&&u.onCancel.apply(u,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=u.okCancel)&&void 0!==n?n:"confirm"===u.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},u,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=u.afterClose)||void 0===e||e.call(u)},okText:u.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:u.direction||g,cancelText:u.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),d=new Promise(e=>{s=e}),u=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>u,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(u=!0,d.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},13703:function(e,t,n){n.d(t,{J$:function(){return s}});var a=n(8985),r=n(59353);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},44056:function(e){e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,u)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,d)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function d(e){return"-"+e.toLowerCase()}function u(e){return e.charAt(1).toUpperCase()}},31872:function(e,t,n){var a=n(96130),r=n(64730),i=n(61861),o=n(46982),s=n(83671),l=n(53618);e.exports=a([i,r,o,s,l])},83671:function(e,t,n){var a=n(7667),r=n(13585),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},53618:function(e,t,n){var a=n(7667),r=n(13585),i=n(46640),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,d=a.spaceSeparated,u=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:u,acceptCharset:d,accessKey:d,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:d,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:d,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:d,coords:c|u,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:d,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:d,httpEquiv:d,id:null,imageSizes:null,imageSrcSet:u,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:d,itemRef:d,itemScope:o,itemType:d,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:d,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:d,required:o,reversed:o,rows:c,rowSpan:c,sandbox:d,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:u,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:d,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},46640:function(e,t,n){var a=n(25852);e.exports=function(e,t){return a(e,t.toLowerCase())}},25852:function(e){e.exports=function(e,t){return t in e?e[t]:t}},13585:function(e,t,n){var a=n(39900),r=n(94949),i=n(7478);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,d=e.transform,u={},p={};for(t in c)n=new i(t,d(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),u[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(u,p,o)}},7478:function(e,t,n){var a=n(74108),r=n(7667);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,d,u=-1;for(s&&(this.space=s),a.call(this,e,t);++u