diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt index 5dece1fc8b..88c0aa4dda 100644 --- a/.circleci/requirements.txt +++ b/.circleci/requirements.txt @@ -10,6 +10,6 @@ anthropic orjson==3.9.15 pydantic==2.10.2 google-cloud-aiplatform==1.43.0 -fastapi-sso==0.10.0 +fastapi-sso==0.16.0 uvloop==0.21.0 mcp==1.5.0 # for MCP server diff --git a/cookbook/LiteLLM_HuggingFace.ipynb b/cookbook/LiteLLM_HuggingFace.ipynb index 3a9a0785be..d608c2675a 100644 --- a/cookbook/LiteLLM_HuggingFace.ipynb +++ b/cookbook/LiteLLM_HuggingFace.ipynb @@ -6,8 +6,9 @@ "id": "9dKM5k8qsMIj" }, "source": [ - "## LiteLLM HuggingFace\n", - "Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface" + "## LiteLLM Hugging Face\n", + "\n", + "Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface\n" ] }, { @@ -27,23 +28,18 @@ "id": "yp5UXRqtpu9f" }, "source": [ - "## Hugging Face Free Serverless Inference API\n", - "Read more about the Free Serverless Inference API here: https://huggingface.co/docs/api-inference.\n", + "## Serverless Inference Providers\n", "\n", - "In order to use litellm to call Serverless Inference API:\n", + "Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.\n", "\n", - "* Browse Serverless Inference compatible models here: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation.\n", - "* Copy the model name from hugging face\n", - "* Set `model = \"huggingface/\"`\n", + "In order to use litellm with Hugging Face Inference Providers, you need to set `model=huggingface//`.\n", "\n", - "Example set `model=huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct` to call `meta-llama/Meta-Llama-3.1-8B-Instruct`\n", - "\n", - "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" + "Example: `huggingface/together/deepseek-ai/DeepSeek-R1` to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -51,107 +47,18 @@ "id": "Pi5Oww8gpCUm", "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ModelResponse(id='chatcmpl-c54dfb68-1491-4d68-a4dc-35e603ea718a', choices=[Choices(finish_reason='eos_token', index=0, message=Message(content=\"I'm just a computer program, so I don't have feelings, but thank you for asking! How can I assist you today?\", role='assistant', tool_calls=None, function_call=None))], created=1724858285, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=47, total_tokens=74))\n", - "ModelResponse(id='chatcmpl-d2ae38e6-4974-431c-bb9b-3fa3f95e5a6d', choices=[Choices(finish_reason='length', index=0, message=Message(content=\"\\n\\nI’m doing well, thank you. I’ve been keeping busy with work and some personal projects. How about you?\\n\\nI'm doing well, thank you. I've been enjoying some time off and catching up on some reading. How can I assist you today?\\n\\nI'm looking for a good book to read. Do you have any recommendations?\\n\\nOf course! Here are a few book recommendations across different genres:\\n\\n1.\", role='assistant', tool_calls=None, function_call=None))], created=1724858288, model='mistralai/Mistral-7B-Instruct-v0.3', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=85, prompt_tokens=6, total_tokens=91))\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", - "import litellm\n", + "from litellm import completion\n", "\n", - "# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n", - "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", + "# You can create a HF token here: https://huggingface.co/settings/tokens\n", + "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n", "\n", - "# Call https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct\n", - "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", - "response = litellm.completion(\n", - " model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", - " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", - ")\n", - "print(response)\n", - "\n", - "\n", - "# Call https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3\n", - "response = litellm.completion(\n", - " model=\"huggingface/mistralai/Mistral-7B-Instruct-v0.3\",\n", - " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", - ")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-klhAhjLtclv" - }, - "source": [ - "## Hugging Face Dedicated Inference Endpoints\n", - "\n", - "Steps to use\n", - "* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n", - "* Set `api_base` to your deployed api base\n", - "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Lbmw8Gl_pHns", - "outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"object\": \"chat.completion\",\n", - " \"choices\": [\n", - " {\n", - " \"finish_reason\": \"length\",\n", - " \"index\": 0,\n", - " \"message\": {\n", - " \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n", - " \"role\": \"assistant\",\n", - " \"logprobs\": -8.9481967812\n", - " }\n", - " }\n", - " ],\n", - " \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n", - " \"created\": 1695871068.8413374,\n", - " \"model\": \"glaiveai/glaive-coder-7b\",\n", - " \"usage\": {\n", - " \"prompt_tokens\": 6,\n", - " \"completion_tokens\": 18,\n", - " \"total_tokens\": 24\n", - " }\n", - "}\n" - ] - } - ], - "source": [ - "import os\n", - "import litellm\n", - "\n", - "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", - "\n", - "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n", - "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", - "# set api base to your deployed api endpoint from hugging face\n", - "response = litellm.completion(\n", - " model=\"huggingface/glaiveai/glaive-coder-7b\",\n", - " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", - " api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "# Call DeepSeek-R1 model through Together AI\n", + "response = completion(\n", + " model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n", + " messages=[{\"content\": \"How many r's are in the word `strawberry`?\", \"role\": \"user\"}],\n", ")\n", "print(response)" ] @@ -162,13 +69,12 @@ "id": "EU0UubrKzTFe" }, "source": [ - "## HuggingFace - Streaming (Serveless or Dedicated)\n", - "Set stream = True" + "## Streaming\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -176,74 +82,147 @@ "id": "y-QfIvA-uJKX", "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='I', role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'m\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' just', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' a', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' computer', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' program', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' so', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' don', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'t\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' have', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' feelings', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' but', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' thank', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' for', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' asking', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='!', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' How', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' can', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' assist', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' today', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='?', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='<|eot_id|>', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n", - "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", - "import litellm\n", + "from litellm import completion\n", "\n", - "# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n", - "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", + "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n", "\n", - "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n", - "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", - "# set api base to your deployed api endpoint from hugging face\n", - "response = litellm.completion(\n", - " model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", - " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", - " stream=True\n", + "response = completion(\n", + " model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"How many r's are in the word `strawberry`?\",\n", + " \n", + " }\n", + " ],\n", + " stream=True,\n", ")\n", "\n", - "print(response)\n", - "\n", "for chunk in response:\n", - " print(chunk)" + " print(chunk)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## With images as input\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "CKXAnK55zQRl" - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from litellm import completion\n", + "\n", + "# Set your Hugging Face Token\n", + "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n", + "\n", + "messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"text\", \"text\": \"What's in this image?\"},\n", + " {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": {\n", + " \"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\",\n", + " },\n", + " },\n", + " ],\n", + " }\n", + "]\n", + "\n", + "response = completion(\n", + " model=\"huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct\",\n", + " messages=messages,\n", + ")\n", + "print(response.choices[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tools - Function Calling\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from litellm import completion\n", + "\n", + "\n", + "# Set your Hugging Face Token\n", + "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n", + "\n", + "tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\",\n", + " },\n", + " \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n", + " },\n", + " \"required\": [\"location\"],\n", + " },\n", + " },\n", + " }\n", + "]\n", + "messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n", + "\n", + "response = completion(\n", + " model=\"huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct\", messages=messages, tools=tools, tool_choice=\"auto\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hugging Face Dedicated Inference Endpoints\n", + "\n", + "Steps to use\n", + "\n", + "- Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n", + "- Set `api_base` to your deployed api base\n", + "- set the model to `huggingface/tgi` so that litellm knows it's a huggingface Deployed Inference Endpoint.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import litellm\n", + "\n", + "\n", + "response = litellm.completion(\n", + " model=\"huggingface/tgi\",\n", + " messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}],\n", + " api_base=\"https://my-endpoint.endpoints.huggingface.cloud/v1/\",\n", + ")\n", + "print(response)" + ] } ], "metadata": { @@ -251,7 +230,8 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", + "language": "python", "name": "python3" }, "language_info": { @@ -264,7 +244,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/docs/my-website/docs/completion/document_understanding.md b/docs/my-website/docs/completion/document_understanding.md index c101aa1aef..f58b836c63 100644 --- a/docs/my-website/docs/completion/document_understanding.md +++ b/docs/my-website/docs/completion/document_understanding.md @@ -27,16 +27,18 @@ os.environ["AWS_REGION_NAME"] = "" # pdf url -image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" +file_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" # model model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0" -image_content = [ +file_content = [ {"type": "text", "text": "What's this file about?"}, { - "type": "image_url", - "image_url": image_url, # OR {"url": image_url} + "type": "file", + "file": { + "file_id": file_url, + } }, ] @@ -46,7 +48,7 @@ if not supports_pdf_input(model, None): response = completion( model=model, - messages=[{"role": "user", "content": image_content}], + messages=[{"role": "user", "content": file_content}], ) assert response is not None ``` @@ -80,11 +82,15 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \ -d '{ "model": "bedrock-model", "messages": [ - {"role": "user", "content": {"type": "text", "text": "What's this file about?"}}, - { - "type": "image_url", - "image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", - } + {"role": "user", "content": [ + {"type": "text", "text": "What's this file about?"}, + { + "type": "file", + "file": { + "file_id": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", + } + } + ]}, ] }' ``` @@ -116,11 +122,13 @@ base64_url = f"data:application/pdf;base64,{encoded_file}" # model model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0" -image_content = [ +file_content = [ {"type": "text", "text": "What's this file about?"}, { - "type": "image_url", - "image_url": base64_url, # OR {"url": base64_url} + "type": "file", + "file": { + "file_data": base64_url, + } }, ] @@ -130,11 +138,53 @@ if not supports_pdf_input(model, None): response = completion( model=model, - messages=[{"role": "user", "content": image_content}], + messages=[{"role": "user", "content": file_content}], ) assert response is not None ``` + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: bedrock-model + litellm_params: + model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 + aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY + aws_region_name: os.environ/AWS_REGION_NAME +``` + +2. Start the proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "bedrock-model", + "messages": [ + {"role": "user", "content": [ + {"type": "text", "text": "What's this file about?"}, + { + "type": "file", + "file": { + "file_data": "data:application/pdf;base64...", + } + } + ]}, + ] +}' +``` + ## Checking if a model supports pdf input @@ -200,92 +250,3 @@ Expected Response - - -## OpenAI 'file' message type - -This is currently only supported for OpenAI models. - -This will be supported for all providers soon. - - - - -```python -import base64 -from litellm import completion - -with open("draconomicon.pdf", "rb") as f: - data = f.read() - -base64_string = base64.b64encode(data).decode("utf-8") - -completion = completion( - model="gpt-4o", - messages=[ - { - "role": "user", - "content": [ - { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": f"data:application/pdf;base64,{base64_string}", - } - }, - { - "type": "text", - "text": "What is the first dragon in the book?", - } - ], - }, - ], -) - -print(completion.choices[0].message.content) -``` - - - - - -1. Setup config.yaml - -```yaml -model_list: - - model_name: openai-model - litellm_params: - model: gpt-4o - api_key: os.environ/OPENAI_API_KEY -``` - -2. Start the proxy - -```bash -litellm --config config.yaml -``` - -3. Test it! - -```bash -curl -X POST 'http://0.0.0.0:4000/chat/completions' \ --H 'Content-Type: application/json' \ --H 'Authorization: Bearer sk-1234' \ --d '{ - "model": "openai-model", - "messages": [ - {"role": "user", "content": [ - { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": f"data:application/pdf;base64,{base64_string}", - } - } - ]} - ] -}' -``` - - - \ No newline at end of file diff --git a/docs/my-website/docs/completion/drop_params.md b/docs/my-website/docs/completion/drop_params.md index e79a88e14b..590d9a4595 100644 --- a/docs/my-website/docs/completion/drop_params.md +++ b/docs/my-website/docs/completion/drop_params.md @@ -107,4 +107,76 @@ response = litellm.completion( -**additional_drop_params**: List or null - Is a list of openai params you want to drop when making a call to the model. \ No newline at end of file +**additional_drop_params**: List or null - Is a list of openai params you want to drop when making a call to the model. + +## Specify allowed openai params in a request + +Tell litellm to allow specific openai params in a request. Use this if you get a `litellm.UnsupportedParamsError` and want to allow a param. LiteLLM will pass the param as is to the model. + + + + + + +In this example we pass `allowed_openai_params=["tools"]` to allow the `tools` param. + +```python showLineNumbers title="Pass allowed_openai_params to LiteLLM Python SDK" +await litellm.acompletion( + model="azure/o_series/", + api_key="xxxxx", + api_base=api_base, + messages=[{"role": "user", "content": "Hello! return a json object"}], + tools=[{"type": "function", "function": {"name": "get_current_time", "description": "Get the current time in a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name, e.g. San Francisco"}}, "required": ["location"]}}}] + allowed_openai_params=["tools"], +) +``` + + + +When using litellm proxy you can pass `allowed_openai_params` in two ways: + +1. Dynamically pass `allowed_openai_params` in a request +2. Set `allowed_openai_params` on the config.yaml file for a specific model + +#### Dynamically pass allowed_openai_params in a request +In this example we pass `allowed_openai_params=["tools"]` to allow the `tools` param for a request sent to the model set on the proxy. + +```python showLineNumbers title="Dynamically pass allowed_openai_params in a request" +import openai +from openai import AsyncAzureOpenAI + +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + extra_body={ + "allowed_openai_params": ["tools"] + } +) +``` + +#### Set allowed_openai_params on config.yaml + +You can also set `allowed_openai_params` on the config.yaml file for a specific model. This means that all requests to this deployment are allowed to pass in the `tools` param. + +```yaml showLineNumbers title="Set allowed_openai_params on config.yaml" +model_list: + - model_name: azure-o1-preview + litellm_params: + model: azure/o_series/ + api_key: xxxxx + api_base: https://openai-prod-test.openai.azure.com/openai/deployments/o1/chat/completions?api-version=2025-01-01-preview + allowed_openai_params: ["tools"] +``` + + \ No newline at end of file diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index 2cfb92546a..dfb7761822 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -1076,32 +1076,24 @@ print(response) ``` -### Parallel Function calling +### Tool Calling / Function Calling + See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call) + + + + + ```python # set Azure env variables import os +import litellm +import json + os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/" os.environ['AZURE_API_VERSION'] = "2023-07-01-preview" -import litellm -import json -# Example dummy function hard coded to return the same weather -# In production, this could be your backend API or an external API -def get_current_weather(location, unit="fahrenheit"): - """Get the current weather in a given location""" - if "tokyo" in location.lower(): - return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"}) - elif "san francisco" in location.lower(): - return json.dumps({"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}) - elif "paris" in location.lower(): - return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"}) - else: - return json.dumps({"location": location, "temperature": "unknown"}) - -## Step 1: send the conversation and available functions to the model -messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}] tools = [ { "type": "function", @@ -1125,7 +1117,7 @@ tools = [ response = litellm.completion( model="azure/chatgpt-functioncalling", # model = azure/ - messages=messages, + messages=[{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}], tools=tools, tool_choice="auto", # auto is default, but we'll be explicit ) @@ -1134,8 +1126,49 @@ response_message = response.choices[0].message tool_calls = response.choices[0].message.tool_calls print("\nTool Choice:\n", tool_calls) ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: azure-gpt-3.5 + litellm_params: + model: azure/chatgpt-functioncalling + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: "2023-07-01-preview" +``` + +2. Start proxy + +```bash +litellm --config config.yaml +``` + +3. Test it + +```bash +curl -L -X POST 'http://localhost:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "azure-gpt-3.5", + "messages": [ + { + "role": "user", + "content": "Hey, how'\''s it going? Thinking long and hard before replying - what is the meaning of the world and life itself" + } + ] +}' +``` + + + + ### Spend Tracking for Azure OpenAI Models (PROXY) Set base model for cost tracking azure image-gen call diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md index 395a544db4..8631cbfdad 100644 --- a/docs/my-website/docs/providers/databricks.md +++ b/docs/my-website/docs/providers/databricks.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🆕 Databricks +# Databricks LiteLLM supports all models on Databricks @@ -154,7 +154,205 @@ response = completion( temperature: 0.5 ``` -## Passings Databricks specific params - 'instruction' + +## Usage - Thinking / `reasoning_content` + +LiteLLM translates OpenAI's `reasoning_effort` to Anthropic's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/23051d89dd3611a81617d84277059cd88b2df511/litellm/llms/anthropic/chat/transformation.py#L298) + +| reasoning_effort | thinking | +| ---------------- | -------- | +| "low" | "budget_tokens": 1024 | +| "medium" | "budget_tokens": 2048 | +| "high" | "budget_tokens": 4096 | + + +Known Limitations: +- Support for passing thinking blocks back to Claude [Issue](https://github.com/BerriAI/litellm/issues/9790) + + + + + +```python +from litellm import completion +import os + +# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`) +os.environ["DATABRICKS_API_KEY"] = "databricks key" +os.environ["DATABRICKS_API_BASE"] = "databricks base url" + +resp = completion( + model="databricks/databricks-claude-3-7-sonnet", + messages=[{"role": "user", "content": "What is the capital of France?"}], + reasoning_effort="low", +) + +``` + + + + + +1. Setup config.yaml + +```yaml +- model_name: claude-3-7-sonnet + litellm_params: + model: databricks/databricks-claude-3-7-sonnet + api_key: os.environ/DATABRICKS_API_KEY + api_base: os.environ/DATABRICKS_API_BASE +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "claude-3-7-sonnet", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "reasoning_effort": "low" + }' +``` + + + + + +**Expected Response** + +```python +ModelResponse( + id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e', + created=1740470510, + model='claude-3-7-sonnet-20250219', + object='chat.completion', + system_fingerprint=None, + choices=[ + Choices( + finish_reason='stop', + index=0, + message=Message( + content="The capital of France is Paris.", + role='assistant', + tool_calls=None, + function_call=None, + provider_specific_fields={ + 'citations': None, + 'thinking_blocks': [ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6...' + } + ] + } + ), + thinking_blocks=[ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6AGB...' + } + ], + reasoning_content='The capital of France is Paris. This is a very straightforward factual question.' + ) + ], + usage=Usage( + completion_tokens=68, + prompt_tokens=42, + total_tokens=110, + completion_tokens_details=None, + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, + cached_tokens=0, + text_tokens=None, + image_tokens=None + ), + cache_creation_input_tokens=0, + cache_read_input_tokens=0 + ) +) +``` + +### Pass `thinking` to Anthropic models + +You can also pass the `thinking` parameter to Anthropic models. + + +You can also pass the `thinking` parameter to Anthropic models. + + + + +```python +from litellm import completion +import os + +# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`) +os.environ["DATABRICKS_API_KEY"] = "databricks key" +os.environ["DATABRICKS_API_BASE"] = "databricks base url" + +response = litellm.completion( + model="databricks/databricks-claude-3-7-sonnet", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, +) +``` + + + + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "databricks/databricks-claude-3-7-sonnet", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "thinking": {"type": "enabled", "budget_tokens": 1024} + }' +``` + + + + + + + + +## Supported Databricks Chat Completion Models + +:::tip + +**We support ALL Databricks models, just set `model=databricks/` as a prefix when sending litellm requests** + +::: + + +| Model Name | Command | +|----------------------------|------------------------------------------------------------------| +| databricks/databricks-claude-3-7-sonnet | `completion(model='databricks/databricks/databricks-claude-3-7-sonnet', messages=messages)` | +| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` | +| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` | +| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` | +| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` | +| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` | +| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` | +| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` | +| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` | + + +## Embedding Models + +### Passing Databricks specific params - 'instruction' For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164) @@ -187,27 +385,6 @@ response = litellm.embedding( instruction: "Represent this sentence for searching relevant passages:" ``` - -## Supported Databricks Chat Completion Models - -:::tip - -**We support ALL Databricks models, just set `model=databricks/` as a prefix when sending litellm requests** - -::: - - -| Model Name | Command | -|----------------------------|------------------------------------------------------------------| -| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` | -| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` | -| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` | -| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` | -| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` | -| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` | -| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` | -| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` | - ## Supported Databricks Embedding Models :::tip diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 67eca21c65..42783286f1 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -887,3 +887,54 @@ response = await client.chat.completions.create( + +## Image Generation + + + + +```python +from litellm import completion + +response = completion( + model="gemini/gemini-2.0-flash-exp-image-generation", + messages=[{"role": "user", "content": "Generate an image of a cat"}], + modalities=["image", "text"], +) +assert response.choices[0].message.content is not None # "data:image/png;base64,e4rr.." +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: gemini-2.0-flash-exp-image-generation + litellm_params: + model: gemini/gemini-2.0-flash-exp-image-generation + api_key: os.environ/GEMINI_API_KEY +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -L -X POST 'http://localhost:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gemini-2.0-flash-exp-image-generation", + "messages": [{"role": "user", "content": "Generate an image of a cat"}], + "modalities": ["image", "text"] +}' +``` + + + + diff --git a/docs/my-website/docs/providers/google_ai_studio/files.md b/docs/my-website/docs/providers/google_ai_studio/files.md new file mode 100644 index 0000000000..500f1d5718 --- /dev/null +++ b/docs/my-website/docs/providers/google_ai_studio/files.md @@ -0,0 +1,161 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# [BETA] Google AI Studio (Gemini) Files API + +Use this to upload files to Google AI Studio (Gemini). + +Useful to pass in large media files to Gemini's `/generateContent` endpoint. + +| Action | Supported | +|----------|-----------| +| `create` | Yes | +| `delete` | No | +| `retrieve` | No | +| `list` | No | + +## Usage + + + + +```python +import base64 +import requests +from litellm import completion, create_file +import os + + +### UPLOAD FILE ### + +# Fetch the audio file and convert it to a base64 encoded string +url = "https://cdn.openai.com/API/docs/audio/alloy.wav" +response = requests.get(url) +response.raise_for_status() +wav_data = response.content +encoded_string = base64.b64encode(wav_data).decode('utf-8') + + +file = create_file( + file=wav_data, + purpose="user_data", + extra_body={"custom_llm_provider": "gemini"}, + api_key=os.getenv("GEMINI_API_KEY"), +) + +print(f"file: {file}") + +assert file is not None + + +### GENERATE CONTENT ### +completion = completion( + model="gemini-2.0-flash", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this recording?" + }, + { + "type": "file", + "file": { + "file_id": file.id, + "filename": "my-test-name", + "format": "audio/wav" + } + } + ] + }, + ] +) + +print(completion.choices[0].message) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: "gemini-2.0-flash" + litellm_params: + model: gemini/gemini-2.0-flash + api_key: os.environ/GEMINI_API_KEY +``` + +2. Start proxy + +```bash +litellm --config config.yaml +``` + +3. Test it + +```python +import base64 +import requests +from openai import OpenAI + +client = OpenAI( + base_url="http://0.0.0.0:4000", + api_key="sk-1234" +) + +# Fetch the audio file and convert it to a base64 encoded string +url = "https://cdn.openai.com/API/docs/audio/alloy.wav" +response = requests.get(url) +response.raise_for_status() +wav_data = response.content +encoded_string = base64.b64encode(wav_data).decode('utf-8') + + +file = client.files.create( + file=wav_data, + purpose="user_data", + extra_body={"target_model_names": "gemini-2.0-flash"} +) + +print(f"file: {file}") + +assert file is not None + +completion = client.chat.completions.create( + model="gemini-2.0-flash", + modalities=["text", "audio"], + audio={"voice": "alloy", "format": "wav"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this recording?" + }, + { + "type": "file", + "file": { + "file_id": file.id, + "filename": "my-test-name", + "format": "audio/wav" + } + } + ] + }, + ], + extra_body={"drop_params": True} +) + +print(completion.choices[0].message) +``` + + + + + + + diff --git a/docs/my-website/docs/providers/huggingface.md b/docs/my-website/docs/providers/huggingface.md index 5297a688ba..399d49b5f4 100644 --- a/docs/my-website/docs/providers/huggingface.md +++ b/docs/my-website/docs/providers/huggingface.md @@ -2,466 +2,392 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Huggingface +# Hugging Face +LiteLLM supports running inference across multiple services for models hosted on the Hugging Face Hub. -LiteLLM supports the following types of Hugging Face models: +- **Serverless Inference Providers** - Hugging Face offers an easy and unified access to serverless AI inference through multiple inference providers, like [Together AI](https://together.ai) and [Sambanova](https://sambanova.ai). This is the fastest way to integrate AI in your products with a maintenance-free and scalable solution. More details in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index). +- **Dedicated Inference Endpoints** - which is a product to easily deploy models to production. Inference is run by Hugging Face in a dedicated, fully managed infrastructure on a cloud provider of your choice. You can deploy your model on Hugging Face Inference Endpoints by following [these steps](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint). -- Serverless Inference API (free) - loaded and ready to use: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation -- Dedicated Inference Endpoints (paid) - manual deployment: https://ui.endpoints.huggingface.co/ -- All LLMs served via Hugging Face's Inference use [Text-generation-inference](https://huggingface.co/docs/text-generation-inference). + +## Supported Models + +### Serverless Inference Providers +You can check available models for an inference provider by going to [huggingface.co/models](https://huggingface.co/models), clicking the "Other" filter tab, and selecting your desired provider: + +![Filter models by Inference Provider](../../img/hf_filter_inference_providers.png) + +For example, you can find all Fireworks supported models [here](https://huggingface.co/models?inference_provider=fireworks-ai&sort=trending). + + +### Dedicated Inference Endpoints +Refer to the [Inference Endpoints catalog](https://endpoints.huggingface.co/catalog) for a list of available models. ## Usage + + + +### Authentication +With a single Hugging Face token, you can access inference through multiple providers. Your calls are routed through Hugging Face and the usage is billed directly to your Hugging Face account at the standard provider API rates. + +Simply set the `HF_TOKEN` environment variable with your Hugging Face token, you can create one here: https://huggingface.co/settings/tokens. + +```bash +export HF_TOKEN="hf_xxxxxx" +``` +or alternatively, you can pass your Hugging Face token as a parameter: +```python +completion(..., api_key="hf_xxxxxx") +``` + +### Getting Started + +To use a Hugging Face model, specify both the provider and model you want to use in the following format: +``` +huggingface/// +``` +Where `/` is the Hugging Face model ID and `` is the inference provider. +By default, if you don't specify a provider, LiteLLM will use the [HF Inference API](https://huggingface.co/docs/api-inference/en/index). + +Examples: + +```python +# Run DeepSeek-R1 inference through Together AI +completion(model="huggingface/together/deepseek-ai/DeepSeek-R1",...) + +# Run Qwen2.5-72B-Instruct inference through Sambanova +completion(model="huggingface/sambanova/Qwen/Qwen2.5-72B-Instruct",...) + +# Run Llama-3.3-70B-Instruct inference through HF Inference API +completion(model="huggingface/meta-llama/Llama-3.3-70B-Instruct",...) +``` + + Open In Colab -You need to tell LiteLLM when you're calling Huggingface. -This is done by adding the "huggingface/" prefix to `model`, example `completion(model="huggingface/",...)`. - - - - -By default, LiteLLM will assume a Hugging Face call follows the [Messages API](https://huggingface.co/docs/text-generation-inference/messages_api), which is fully compatible with the OpenAI Chat Completion API. - - - +### Basic Completion +Here's an example of chat completion using the DeepSeek-R1 model through Together AI: ```python import os from litellm import completion -# [OPTIONAL] set env var -os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key" +os.environ["HF_TOKEN"] = "hf_xxxxxx" -messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}] - -# e.g. Call 'https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct' from Serverless Inference API response = completion( - model="huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct", - messages=[{ "content": "Hello, how are you?","role": "user"}], + model="huggingface/together/deepseek-ai/DeepSeek-R1", + messages=[ + { + "role": "user", + "content": "How many r's are in the word 'strawberry'?", + } + ], +) +print(response) +``` + +### Streaming +Now, let's see what a streaming request looks like. + +```python +import os +from litellm import completion + +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +response = completion( + model="huggingface/together/deepseek-ai/DeepSeek-R1", + messages=[ + { + "role": "user", + "content": "How many r's are in the word `strawberry`?", + + } + ], + stream=True, +) + +for chunk in response: + print(chunk) +``` + +### Image Input +You can also pass images when the model supports it. Here is an example using [Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct) model through Sambanova. + +```python +from litellm import completion + +# Set your Hugging Face Token +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + } + }, + ], + } + ] + +response = completion( + model="huggingface/sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, +) +print(response.choices[0]) +``` + +### Function Calling +You can extend the model's capabilities by giving them access to tools. Here is an example with function calling using [Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct) model through Sambanova. + +```python +import os +from litellm import completion + +# Set your Hugging Face Token +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + } + } +] +messages = [ + { + "role": "user", + "content": "What's the weather like in Boston today?", + } +] + +response = completion( + model="huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct", + messages=messages, + tools=tools, + tool_choice="auto" +) +print(response) +``` + + + + + + + Open In Colab + + +### Basic Completion +After you have [deployed your Hugging Face Inference Endpoint](https://endpoints.huggingface.co/new) on dedicated infrastructure, you can run inference on it by providing the endpoint base URL in `api_base`, and indicating `huggingface/tgi` as the model name. + +```python +import os +from litellm import completion + +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +response = completion( + model="huggingface/tgi", + messages=[{"content": "Hello, how are you?", "role": "user"}], + api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/" +) +print(response) +``` + +### Streaming + +```python +import os +from litellm import completion + +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +response = completion( + model="huggingface/tgi", + messages=[{"content": "Hello, how are you?", "role": "user"}], + api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/", stream=True ) -print(response) -``` - - - - -1. Add models to your config.yaml - -```yaml -model_list: - - model_name: llama-3.1-8B-instruct - litellm_params: - model: huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct - api_key: os.environ/HUGGINGFACE_API_KEY -``` - -2. Start the proxy - -```bash -$ litellm --config /path/to/config.yaml --debug -``` - -3. Test it! - -```shell -curl --location 'http://0.0.0.0:4000/chat/completions' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "model": "llama-3.1-8B-instruct", - "messages": [ - { - "role": "user", - "content": "I like you!" - } - ], -}' -``` - - - - - - -Append `text-classification` to the model name - -e.g. `huggingface/text-classification/` - - - - -```python -import os -from litellm import completion - -# [OPTIONAL] set env var -os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key" - -messages = [{ "content": "I like you, I love you!","role": "user"}] - -# e.g. Call 'shahrukhx01/question-vs-statement-classifier' hosted on HF Inference endpoints -response = completion( - model="huggingface/text-classification/shahrukhx01/question-vs-statement-classifier", - messages=messages, - api_base="https://my-endpoint.endpoints.huggingface.cloud", -) - -print(response) -``` - - - - -1. Add models to your config.yaml - -```yaml -model_list: - - model_name: bert-classifier - litellm_params: - model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier - api_key: os.environ/HUGGINGFACE_API_KEY - api_base: "https://my-endpoint.endpoints.huggingface.cloud" -``` - -2. Start the proxy - -```bash -$ litellm --config /path/to/config.yaml --debug -``` - -3. Test it! - -```shell -curl --location 'http://0.0.0.0:4000/chat/completions' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "model": "bert-classifier", - "messages": [ - { - "role": "user", - "content": "I like you!" - } - ], -}' -``` - - - - - - -Steps to use -* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/ -* Set `api_base` to your deployed api base -* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint - - - - -```python -import os -from litellm import completion - -os.environ["HUGGINGFACE_API_KEY"] = "" - -# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b -# add the 'huggingface/' prefix to the model to set huggingface as the provider -# set api base to your deployed api endpoint from hugging face -response = completion( - model="huggingface/glaiveai/glaive-coder-7b", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud" -) -print(response) -``` - - - - -1. Add models to your config.yaml - -```yaml -model_list: - - model_name: glaive-coder - litellm_params: - model: huggingface/glaiveai/glaive-coder-7b - api_key: os.environ/HUGGINGFACE_API_KEY - api_base: "https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud" -``` - -2. Start the proxy - -```bash -$ litellm --config /path/to/config.yaml --debug -``` - -3. Test it! - -```shell -curl --location 'http://0.0.0.0:4000/chat/completions' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "model": "glaive-coder", - "messages": [ - { - "role": "user", - "content": "I like you!" - } - ], -}' -``` - - - - - - - -## Streaming - - - Open In Colab - - -You need to tell LiteLLM when you're calling Huggingface. -This is done by adding the "huggingface/" prefix to `model`, example `completion(model="huggingface/",...)`. - -```python -import os -from litellm import completion - -# [OPTIONAL] set env var -os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key" - -messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}] - -# e.g. Call 'facebook/blenderbot-400M-distill' hosted on HF Inference endpoints -response = completion( - model="huggingface/facebook/blenderbot-400M-distill", - messages=messages, - api_base="https://my-endpoint.huggingface.cloud", - stream=True -) - -print(response) for chunk in response: - print(chunk) + print(chunk) ``` +### Image Input + +```python +import os +from litellm import completion + +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + } + }, + ], + } + ] +response = completion( + model="huggingface/tgi", + messages=messages, + api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/"" +) +print(response.choices[0]) +``` + +### Function Calling + +```python +import os +from litellm import completion + +os.environ["HF_TOKEN"] = "hf_xxxxxx" + +functions = [{ + "name": "get_weather", + "description": "Get the weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get weather for" + } + }, + "required": ["location"] + } +}] + +response = completion( + model="huggingface/tgi", + messages=[{"content": "What's the weather like in San Francisco?", "role": "user"}], + api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/", + functions=functions +) +print(response) +``` + + + + +## LiteLLM Proxy Server with Hugging Face models +You can set up a [LiteLLM Proxy Server](https://docs.litellm.ai/#litellm-proxy-server-llm-gateway) to serve Hugging Face models through any of the supported Inference Providers. Here's how to do it: + +### Step 1. Setup the config file + +In this case, we are configuring a proxy to serve `DeepSeek R1` from Hugging Face, using Together AI as the backend Inference Provider. + +```yaml +model_list: + - model_name: my-r1-model + litellm_params: + model: huggingface/together/deepseek-ai/DeepSeek-R1 + api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env +``` + +### Step 2. Start the server +```bash +litellm --config /path/to/config.yaml +``` + +### Step 3. Make a request to the server + + + +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "my-r1-model", + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + } + ] +}' +``` + + + + +```python +# pip install openai +from openai import OpenAI + +client = OpenAI( + base_url="http://0.0.0.0:4000", + api_key="anything", +) + +response = client.chat.completions.create( + model="my-r1-model", + messages=[ + {"role": "user", "content": "Hello, how are you?"} + ] +) +print(response) +``` + + + + + ## Embedding -LiteLLM supports Hugging Face's [text-embedding-inference](https://github.com/huggingface/text-embeddings-inference) format. +LiteLLM supports Hugging Face's [text-embedding-inference](https://github.com/huggingface/text-embeddings-inference) models as well. ```python from litellm import embedding import os -os.environ['HUGGINGFACE_API_KEY'] = "" +os.environ['HF_TOKEN'] = "hf_xxxxxx" response = embedding( model='huggingface/microsoft/codebert-base', input=["good morning from litellm"] ) ``` -## Advanced - -### Setting API KEYS + API BASE - -If required, you can set the api key + api base, set it in your os environment. [Code for how it's sent](https://github.com/BerriAI/litellm/blob/0100ab2382a0e720c7978fbf662cc6e6920e7e03/litellm/llms/huggingface_restapi.py#L25) - -```python -import os -os.environ["HUGGINGFACE_API_KEY"] = "" -os.environ["HUGGINGFACE_API_BASE"] = "" -``` - -### Viewing Log probs - -#### Using `decoder_input_details` - OpenAI `echo` - -The `echo` param is supported by OpenAI Completions - Use `litellm.text_completion()` for this - -```python -from litellm import text_completion -response = text_completion( - model="huggingface/bigcode/starcoder", - prompt="good morning", - max_tokens=10, logprobs=10, - echo=True -) -``` - -#### Output - -```json -{ - "id": "chatcmpl-3fc71792-c442-4ba1-a611-19dd0ac371ad", - "object": "text_completion", - "created": 1698801125.936519, - "model": "bigcode/starcoder", - "choices": [ - { - "text": ", I'm going to make you a sand", - "index": 0, - "logprobs": { - "tokens": [ - "good", - " morning", - ",", - " I", - "'m", - " going", - " to", - " make", - " you", - " a", - " s", - "and" - ], - "token_logprobs": [ - "None", - -14.96875, - -2.2285156, - -2.734375, - -2.0957031, - -2.0917969, - -0.09429932, - -3.1132812, - -1.3203125, - -1.2304688, - -1.6201172, - -0.010292053 - ] - }, - "finish_reason": "length" - } - ], - "usage": { - "completion_tokens": 9, - "prompt_tokens": 2, - "total_tokens": 11 - } -} -``` - -### Models with Prompt Formatting - -For models with special prompt templates (e.g. Llama2), we format the prompt to fit their template. - -#### Models with natively Supported Prompt Templates - -| Model Name | Works for Models | Function Call | Required OS Variables | -| ------------------------------------ | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | -| mistralai/Mistral-7B-Instruct-v0.1 | mistralai/Mistral-7B-Instruct-v0.1 | `completion(model='huggingface/mistralai/Mistral-7B-Instruct-v0.1', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='huggingface/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='huggingface/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='huggingface/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='huggingface/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='huggingface/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | -| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='huggingface/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` | - -**What if we don't support a model you need?** -You can also specify you're own custom prompt formatting, in case we don't have your model covered yet. - -**Does this mean you have to specify a prompt for all models?** -No. By default we'll concatenate your message content to make a prompt. - -**Default Prompt Template** - -```python -def default_pt(messages): - return " ".join(message["content"] for message in messages) -``` - -[Code for how prompt formats work in LiteLLM](https://github.com/BerriAI/litellm/blob/main/litellm/llms/prompt_templates/factory.py) - -#### Custom prompt templates - -```python -import litellm - -# Create your own custom prompt template works -litellm.register_prompt_template( - model="togethercomputer/LLaMA-2-7B-32K", - roles={ - "system": { - "pre_message": "[INST] <>\n", - "post_message": "\n<>\n [/INST]\n" - }, - "user": { - "pre_message": "[INST] ", - "post_message": " [/INST]\n" - }, - "assistant": { - "post_message": "\n" - } - } - ) - -def test_huggingface_custom_model(): - model = "huggingface/togethercomputer/LLaMA-2-7B-32K" - response = completion(model=model, messages=messages, api_base="https://ecd4sb5n09bo4ei2.us-east-1.aws.endpoints.huggingface.cloud") - print(response['choices'][0]['message']['content']) - return response - -test_huggingface_custom_model() -``` - -[Implementation Code](https://github.com/BerriAI/litellm/blob/c0b3da2c14c791a0b755f0b1e5a9ef065951ecbf/litellm/llms/huggingface_restapi.py#L52) - -### Deploying a model on huggingface - -You can use any chat/text model from Hugging Face with the following steps: - -- Copy your model id/url from Huggingface Inference Endpoints - - [ ] Go to https://ui.endpoints.huggingface.co/ - - [ ] Copy the url of the specific model you'd like to use - HF_Dashboard -- Set it as your model name -- Set your HUGGINGFACE_API_KEY as an environment variable - -Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint) - -# output - -Same as the OpenAI format, but also includes logprobs. [See the code](https://github.com/BerriAI/litellm/blob/b4b2dbf005142e0a483d46a07a88a19814899403/litellm/llms/huggingface_restapi.py#L115) - -```json -{ - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": "\ud83d\ude31\n\nComment: @SarahSzabo I'm", - "role": "assistant", - "logprobs": -22.697942825499993 - } - } - ], - "created": 1693436637.38206, - "model": "https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", - "usage": { - "prompt_tokens": 14, - "completion_tokens": 11, - "total_tokens": 25 - } -} -``` - # FAQ -**Does this support stop sequences?** +**How does billing work with Hugging Face Inference Providers?** -Yes, we support stop sequences - and you can pass as many as allowed by Hugging Face (or any provider!) +> Billing is centralized on your Hugging Face account, no matter which providers you are using. You are billed the standard provider API rates with no additional markup - Hugging Face simply passes through the provider costs. Note that [Hugging Face PRO](https://huggingface.co/subscribe/pro) users get $2 worth of Inference credits every month that can be used across providers. -**How do you deal with repetition penalty?** +**Do I need to create an account for each Inference Provider?** -We map the presence penalty parameter in openai to the repetition penalty parameter on Hugging Face. [See code](https://github.com/BerriAI/litellm/blob/b4b2dbf005142e0a483d46a07a88a19814899403/litellm/utils.py#L757). +> No, you don't need to create separate accounts. All requests are routed through Hugging Face, so you only need your HF token. This allows you to easily benchmark different providers and choose the one that best fits your needs. -We welcome any suggestions for improving our Hugging Face integration - Create an [issue](https://github.com/BerriAI/litellm/issues/new/choose)/[Join the Discord](https://discord.com/invite/wuPM9dRgDw)! +**Will more inference providers be supported by Hugging Face in the future?** + +> Yes! New inference providers (and models) are being added gradually. + +We welcome any suggestions for improving our Hugging Face integration - Create an [issue](https://github.com/BerriAI/litellm/issues/new/choose)/[Join the Discord](https://discord.com/invite/wuPM9dRgDw)! \ No newline at end of file diff --git a/docs/my-website/docs/proxy/admin_ui_sso.md b/docs/my-website/docs/proxy/admin_ui_sso.md index 882e3df0b2..0bbba57fd9 100644 --- a/docs/my-website/docs/proxy/admin_ui_sso.md +++ b/docs/my-website/docs/proxy/admin_ui_sso.md @@ -156,7 +156,7 @@ PROXY_LOGOUT_URL="https://www.google.com" Set this in your .env (so the proxy can set the correct redirect url) ```shell -PROXY_BASE_URL=https://litellm-api.up.railway.app/ +PROXY_BASE_URL=https://litellm-api.up.railway.app ``` #### Step 4. Test flow diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 05b3e0be37..455bdda938 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -406,6 +406,7 @@ router_settings: | HELICONE_API_KEY | API key for Helicone service | HOSTNAME | Hostname for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog) | HUGGINGFACE_API_BASE | Base URL for Hugging Face API +| HUGGINGFACE_API_KEY | API key for Hugging Face API | IAM_TOKEN_DB_AUTH | IAM token for database authentication | JSON_LOGS | Enable JSON formatted logging | JWT_AUDIENCE | Expected audience for JWT tokens diff --git a/docs/my-website/docs/proxy/cost_tracking.md b/docs/my-website/docs/proxy/cost_tracking.md index 7f90273c39..784de1da81 100644 --- a/docs/my-website/docs/proxy/cost_tracking.md +++ b/docs/my-website/docs/proxy/cost_tracking.md @@ -6,6 +6,8 @@ import Image from '@theme/IdealImage'; Track spend for keys, users, and teams across 100+ LLMs. +LiteLLM automatically tracks spend for all known models. See our [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) + ### How to Track Spend with LiteLLM **Step 1** @@ -35,10 +37,10 @@ response = client.chat.completions.create( "content": "this is a test request, write a short poem" } ], - user="palantir", - extra_body={ + user="palantir", # OPTIONAL: pass user to track spend by user + extra_body={ "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } } ) @@ -63,9 +65,9 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ "content": "what llm are you" } ], - "user": "palantir", + "user": "palantir", # OPTIONAL: pass user to track spend by user "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } }' ``` @@ -90,7 +92,7 @@ chat = ChatOpenAI( user="palantir", extra_body={ "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } } ) @@ -150,8 +152,134 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin -## ✨ (Enterprise) API Endpoints to get Spend -### Getting Spend Reports - To Charge Other Teams, Customers, Users +### Allowing Non-Proxy Admins to access `/spend` endpoints + +Use this when you want non-proxy admins to access `/spend` endpoints + +:::info + +Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +::: + +##### Create Key +Create Key with with `permissions={"get_spend_routes": true}` +```shell +curl --location 'http://0.0.0.0:4000/key/generate' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "permissions": {"get_spend_routes": true} + }' +``` + +##### Use generated key on `/spend` endpoints + +Access spend Routes with newly generate keys +```shell +curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \ + -H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A' +``` + + + +#### Reset Team, API Key Spend - MASTER KEY ONLY + +Use `/global/spend/reset` if you want to: +- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0` + +- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes + +##### Request +Only the `LITELLM_MASTER_KEY` you set can access this route +```shell +curl -X POST \ + 'http://localhost:4000/global/spend/reset' \ + -H 'Authorization: Bearer sk-1234' \ + -H 'Content-Type: application/json' +``` + +##### Expected Responses + +```shell +{"message":"Spend for all API Keys and Teams reset successfully","status":"success"} +``` + + +## Set 'base_model' for Cost Tracking (e.g. Azure deployments) + +**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking + +**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost + +Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) + +Example config with `base_model` +```yaml +model_list: + - model_name: azure-gpt-3.5 + litellm_params: + model: azure/chatgpt-v-2 + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: "2023-07-01-preview" + model_info: + base_model: azure/gpt-4-1106-preview +``` + +## Daily Spend Breakdown API + +Retrieve granular daily usage data for a user (by model, provider, and API key) with a single endpoint. + +Example Request: + +```shell title="Daily Spend Breakdown API" showLineNumbers +curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \ +-H 'Authorization: Bearer sk-...' +``` + +```json title="Daily Spend Breakdown API Response" showLineNumbers +{ + "results": [ + { + "date": "2025-03-27", + "metrics": { + "spend": 0.0177072, + "prompt_tokens": 111, + "completion_tokens": 1711, + "total_tokens": 1822, + "api_requests": 11 + }, + "breakdown": { + "models": { + "gpt-4o-mini": { + "spend": 1.095e-05, + "prompt_tokens": 37, + "completion_tokens": 9, + "total_tokens": 46, + "api_requests": 1 + }, + "providers": { "openai": { ... }, "azure_ai": { ... } }, + "api_keys": { "3126b6eaf1...": { ... } } + } + } + ], + "metadata": { + "total_spend": 0.7274667, + "total_prompt_tokens": 280990, + "total_completion_tokens": 376674, + "total_api_requests": 14 + } +} +``` + +### API Reference + +See our [Swagger API](https://litellm-api.up.railway.app/#/Budget%20%26%20Spend%20Tracking/get_user_daily_activity_user_daily_activity_get) for more details on the `/user/daily/activity` endpoint + +## ✨ (Enterprise) Generate Spend Reports + +Use this to charge other teams, customers, users Use the `/global/spend/report` endpoint to get spend reports @@ -470,105 +598,6 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end -### Allowing Non-Proxy Admins to access `/spend` endpoints - -Use this when you want non-proxy admins to access `/spend` endpoints - -:::info - -Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) - -::: - -##### Create Key -Create Key with with `permissions={"get_spend_routes": true}` -```shell -curl --location 'http://0.0.0.0:4000/key/generate' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "permissions": {"get_spend_routes": true} - }' -``` - -##### Use generated key on `/spend` endpoints - -Access spend Routes with newly generate keys -```shell -curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \ - -H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A' -``` - - - -#### Reset Team, API Key Spend - MASTER KEY ONLY - -Use `/global/spend/reset` if you want to: -- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0` - -- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes - -##### Request -Only the `LITELLM_MASTER_KEY` you set can access this route -```shell -curl -X POST \ - 'http://localhost:4000/global/spend/reset' \ - -H 'Authorization: Bearer sk-1234' \ - -H 'Content-Type: application/json' -``` - -##### Expected Responses - -```shell -{"message":"Spend for all API Keys and Teams reset successfully","status":"success"} -``` - - - - -## Spend Tracking for Azure OpenAI Models - -Set base model for cost tracking azure image-gen call - -#### Image Generation - -```yaml -model_list: - - model_name: dall-e-3 - litellm_params: - model: azure/dall-e-3-test - api_version: 2023-06-01-preview - api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ - api_key: os.environ/AZURE_API_KEY - base_model: dall-e-3 # 👈 set dall-e-3 as base model - model_info: - mode: image_generation -``` - -#### Chat Completions / Embeddings - -**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking - -**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost - -Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) - -Example config with `base_model` -```yaml -model_list: - - model_name: azure-gpt-3.5 - litellm_params: - model: azure/chatgpt-v-2 - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: "2023-07-01-preview" - model_info: - base_model: azure/gpt-4-1106-preview -``` - -## Custom Input/Output Pricing - -👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models ## ✨ Custom Spend Log metadata @@ -587,4 +616,5 @@ Logging specific key,value pairs in spend logs metadata is an enterprise feature Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags) -::: \ No newline at end of file +::: + diff --git a/docs/my-website/docs/proxy/db_deadlocks.md b/docs/my-website/docs/proxy/db_deadlocks.md new file mode 100644 index 0000000000..332374995d --- /dev/null +++ b/docs/my-website/docs/proxy/db_deadlocks.md @@ -0,0 +1,86 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# High Availability Setup (Resolve DB Deadlocks) + +Resolve any Database Deadlocks you see in high traffic by using this setup + +## What causes the problem? + +LiteLLM writes `UPDATE` and `UPSERT` queries to the DB. When using 10+ instances of LiteLLM, these queries can cause deadlocks since each instance could simultaneously attempt to update the same `user_id`, `team_id`, `key` etc. + +## How the high availability setup fixes the problem +- All instances will write to a Redis queue instead of the DB. +- A single instance will acquire a lock on the DB and flush the redis queue to the DB. + + +## How it works + +### Stage 1. Each instance writes updates to redis + +Each instance will accumlate the spend updates for a key, user, team, etc and write the updates to a redis queue. + + +

+Each instance writes updates to redis +

+ + +### Stage 2. A single instance flushes the redis queue to the DB + +A single instance will acquire a lock on the DB and flush all elements in the redis queue to the DB. + +- 1 instance will attempt to acquire the lock for the DB update job +- The status of the lock is stored in redis +- If the instance acquires the lock to write to DB + - It will read all updates from redis + - Aggregate all updates into 1 transaction + - Write updates to DB + - Release the lock +- Note: Only 1 instance can acquire the lock at a time, this limits the number of instances that can write to the DB at once + + + +

+A single instance flushes the redis queue to the DB +

+ + +## Usage + +### Required components + +- Redis +- Postgres + +### Setup on LiteLLM config + +You can enable using the redis buffer by setting `use_redis_transaction_buffer: true` in the `general_settings` section of your `proxy_config.yaml` file. + +Note: This setup requires litellm to be connected to a redis instance. + +```yaml showLineNumbers title="litellm proxy_config.yaml" +general_settings: + use_redis_transaction_buffer: true + +litellm_settings: + cache: True + cache_params: + type: redis + supported_call_types: [] # Optional: Set cache for proxy, but not on the actual llm api call +``` + +## Monitoring + +LiteLLM emits the following prometheus metrics to monitor the health/status of the in memory buffer and redis buffer. + + +| Metric Name | Description | Storage Type | +|-----------------------------------------------------|-----------------------------------------------------------------------------|--------------| +| `litellm_pod_lock_manager_size` | Indicates which pod has the lock to write updates to the database. | Redis | +| `litellm_in_memory_daily_spend_update_queue_size` | Number of items in the in-memory daily spend update queue. These are the aggregate spend logs for each user. | In-Memory | +| `litellm_redis_daily_spend_update_queue_size` | Number of items in the Redis daily spend update queue. These are the aggregate spend logs for each user. | Redis | +| `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory | +| `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis | + diff --git a/docs/my-website/docs/proxy/guardrails/aim_security.md b/docs/my-website/docs/proxy/guardrails/aim_security.md index 3138dc7d12..d76c4e0c1c 100644 --- a/docs/my-website/docs/proxy/guardrails/aim_security.md +++ b/docs/my-website/docs/proxy/guardrails/aim_security.md @@ -140,7 +140,7 @@ The above request should not be blocked, and you should receive a regular LLM re -# Advanced +## Advanced Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users. To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request. diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 1c1cbeedb4..2d09502d52 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -177,6 +177,50 @@ export LITELLM_SALT_KEY="sk-1234" [**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15) + +## 9. Use `prisma migrate deploy` + +Use this to handle db migrations across LiteLLM versions in production + + + + +```bash +USE_PRISMA_MIGRATE="True" +``` + + + + + +```bash +litellm --use_prisma_migrate +``` + + + + +Benefits: + +The migrate deploy command: + +- **Does not** issue a warning if an already applied migration is missing from migration history +- **Does not** detect drift (production database schema differs from migration history end state - for example, due to a hotfix) +- **Does not** reset the database or generate artifacts (such as Prisma Client) +- **Does not** rely on a shadow database + + +### How does LiteLLM handle DB migrations in production? + +1. A new migration file is written to our `litellm-proxy-extras` package. [See all](https://github.com/BerriAI/litellm/tree/main/litellm-proxy-extras/litellm_proxy_extras/migrations) + +2. The core litellm pip package is bumped to point to the new `litellm-proxy-extras` package. This ensures, older versions of LiteLLM will continue to use the old migrations. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/pyproject.toml#L58) + +3. When you upgrade to a new version of LiteLLM, the migration file is applied to the database. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/litellm-proxy-extras/litellm_proxy_extras/utils.py#L42) + + + + ## Extras ### Expected Performance in Production diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 8dff527ae5..3666c6f738 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -242,6 +242,19 @@ litellm_settings: | `litellm_redis_fails` | Number of failed redis calls | | `litellm_self_latency` | Histogram latency for successful litellm api call | +#### DB Transaction Queue Health Metrics + +Use these metrics to monitor the health of the DB Transaction Queue. Eg. Monitoring the size of the in-memory and redis buffers. + +| Metric Name | Description | Storage Type | +|-----------------------------------------------------|-----------------------------------------------------------------------------|--------------| +| `litellm_pod_lock_manager_size` | Indicates which pod has the lock to write updates to the database. | Redis | +| `litellm_in_memory_daily_spend_update_queue_size` | Number of items in the in-memory daily spend update queue. These are the aggregate spend logs for each user. | In-Memory | +| `litellm_redis_daily_spend_update_queue_size` | Number of items in the Redis daily spend update queue. These are the aggregate spend logs for each user. | Redis | +| `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory | +| `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis | + + ## **🔥 LiteLLM Maintained Grafana Dashboards ** @@ -268,6 +281,17 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das +## Add authentication on /metrics endpoint + +**By default /metrics endpoint is unauthenticated.** + +You can opt into running litellm authentication on the /metrics endpoint by setting the following on the config + +```yaml +litellm_settings: + require_auth_for_metrics_endpoint: true +``` + ## FAQ ### What are `_created` vs. `_total` metrics? diff --git a/docs/my-website/img/deadlock_fix_1.png b/docs/my-website/img/deadlock_fix_1.png new file mode 100644 index 0000000000..df651f440c Binary files /dev/null and b/docs/my-website/img/deadlock_fix_1.png differ diff --git a/docs/my-website/img/deadlock_fix_2.png b/docs/my-website/img/deadlock_fix_2.png new file mode 100644 index 0000000000..0f139d84e5 Binary files /dev/null and b/docs/my-website/img/deadlock_fix_2.png differ diff --git a/docs/my-website/img/hf_filter_inference_providers.png b/docs/my-website/img/hf_filter_inference_providers.png new file mode 100644 index 0000000000..d4c7188919 Binary files /dev/null and b/docs/my-website/img/hf_filter_inference_providers.png differ diff --git a/docs/my-website/img/prevent_deadlocks.jpg b/docs/my-website/img/prevent_deadlocks.jpg new file mode 100644 index 0000000000..2807f327d1 Binary files /dev/null and b/docs/my-website/img/prevent_deadlocks.jpg differ diff --git a/docs/my-website/img/release_notes/new_activity_tab.png b/docs/my-website/img/release_notes/new_activity_tab.png new file mode 100644 index 0000000000..e8cea22a90 Binary files /dev/null and b/docs/my-website/img/release_notes/new_activity_tab.png differ diff --git a/docs/my-website/img/release_notes/spend_by_model.jpg b/docs/my-website/img/release_notes/spend_by_model.jpg new file mode 100644 index 0000000000..2584949eff Binary files /dev/null and b/docs/my-website/img/release_notes/spend_by_model.jpg differ diff --git a/docs/my-website/package-lock.json b/docs/my-website/package-lock.json index 6c07e67d91..06251b16bb 100644 --- a/docs/my-website/package-lock.json +++ b/docs/my-website/package-lock.json @@ -12559,9 +12559,10 @@ } }, "node_modules/image-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz", - "integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.2.1.tgz", + "integrity": "sha512-rH+46sQJ2dlwfjfhCyNx5thzrv+dtmBIhPHk0zgRUukHzZ/kRueTJXoYYsclBaKcSMBWuGbOFXtioLpzTb5euw==", + "license": "MIT", "dependencies": { "queue": "6.0.2" }, diff --git a/docs/my-website/release_notes/v1.55.10/index.md b/docs/my-website/release_notes/v1.55.10/index.md index 7f9839c2b5..2b5ce75cf0 100644 --- a/docs/my-website/release_notes/v1.55.10/index.md +++ b/docs/my-website/release_notes/v1.55.10/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.55.8-stable/index.md b/docs/my-website/release_notes/v1.55.8-stable/index.md index 7e82e94747..38c78eb537 100644 --- a/docs/my-website/release_notes/v1.55.8-stable/index.md +++ b/docs/my-website/release_notes/v1.55.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.1/index.md b/docs/my-website/release_notes/v1.56.1/index.md index 7c4ccc74ea..74f3606b90 100644 --- a/docs/my-website/release_notes/v1.56.1/index.md +++ b/docs/my-website/release_notes/v1.56.1/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.3/index.md b/docs/my-website/release_notes/v1.56.3/index.md index 95205633ea..3d996ba5b8 100644 --- a/docs/my-website/release_notes/v1.56.3/index.md +++ b/docs/my-website/release_notes/v1.56.3/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.4/index.md b/docs/my-website/release_notes/v1.56.4/index.md index 93f8725632..bf9cc2d94e 100644 --- a/docs/my-website/release_notes/v1.56.4/index.md +++ b/docs/my-website/release_notes/v1.56.4/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.3/index.md b/docs/my-website/release_notes/v1.57.3/index.md index 3bee71a8e1..ab1154a0a8 100644 --- a/docs/my-website/release_notes/v1.57.3/index.md +++ b/docs/my-website/release_notes/v1.57.3/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.7/index.md b/docs/my-website/release_notes/v1.57.7/index.md index ce987baf77..4da2402efa 100644 --- a/docs/my-website/release_notes/v1.57.7/index.md +++ b/docs/my-website/release_notes/v1.57.7/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.8-stable/index.md b/docs/my-website/release_notes/v1.57.8-stable/index.md index d37a7b9ff8..56eb09a20f 100644 --- a/docs/my-website/release_notes/v1.57.8-stable/index.md +++ b/docs/my-website/release_notes/v1.57.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.59.0/index.md b/docs/my-website/release_notes/v1.59.0/index.md index 5343ba49ad..2699e42020 100644 --- a/docs/my-website/release_notes/v1.59.0/index.md +++ b/docs/my-website/release_notes/v1.59.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.59.8-stable/index.md b/docs/my-website/release_notes/v1.59.8-stable/index.md index fa9825fb66..023f284ad5 100644 --- a/docs/my-website/release_notes/v1.59.8-stable/index.md +++ b/docs/my-website/release_notes/v1.59.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.61.20-stable/index.md b/docs/my-website/release_notes/v1.61.20-stable/index.md index 132c1aa318..5012e2aa90 100644 --- a/docs/my-website/release_notes/v1.61.20-stable/index.md +++ b/docs/my-website/release_notes/v1.61.20-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.0/index.md b/docs/my-website/release_notes/v1.63.0/index.md index e74a2f9b86..ab74b11b4d 100644 --- a/docs/my-website/release_notes/v1.63.0/index.md +++ b/docs/my-website/release_notes/v1.63.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.11-stable/index.md b/docs/my-website/release_notes/v1.63.11-stable/index.md index 91336fc681..882747a07b 100644 --- a/docs/my-website/release_notes/v1.63.11-stable/index.md +++ b/docs/my-website/release_notes/v1.63.11-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.14/index.md b/docs/my-website/release_notes/v1.63.14/index.md index aaeac639d4..ff2630468c 100644 --- a/docs/my-website/release_notes/v1.63.14/index.md +++ b/docs/my-website/release_notes/v1.63.14/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.2-stable/index.md b/docs/my-website/release_notes/v1.63.2-stable/index.md index 0c359452dc..3d47e02ac1 100644 --- a/docs/my-website/release_notes/v1.63.2-stable/index.md +++ b/docs/my-website/release_notes/v1.63.2-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.0-stable/index.md b/docs/my-website/release_notes/v1.65.0-stable/index.md index ec5f3bd441..3696f5023c 100644 --- a/docs/my-website/release_notes/v1.65.0-stable/index.md +++ b/docs/my-website/release_notes/v1.65.0-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.0/index.md b/docs/my-website/release_notes/v1.65.0/index.md index 46525ea55f..84276c997d 100644 --- a/docs/my-website/release_notes/v1.65.0/index.md +++ b/docs/my-website/release_notes/v1.65.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.4-stable/index.md b/docs/my-website/release_notes/v1.65.4-stable/index.md new file mode 100644 index 0000000000..872024a47a --- /dev/null +++ b/docs/my-website/release_notes/v1.65.4-stable/index.md @@ -0,0 +1,176 @@ +--- +title: v1.65.4-stable +slug: v1.65.4-stable +date: 2025-04-05T10:00:00 +authors: + - name: Krrish Dholakia + title: CEO, LiteLLM + url: https://www.linkedin.com/in/krish-d/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 + - name: Ishaan Jaffer + title: CTO, LiteLLM + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg + +tags: [] +hide_table_of_contents: false +--- + +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Deploy this version + + + + +``` showLineNumbers title="docker run litellm" +docker run +-e STORE_MODEL_IN_DB=True +-p 4000:4000 +ghcr.io/berriai/litellm:main-v1.65.4-stable +``` + + + + +``` showLineNumbers title="pip install litellm" +pip install litellm==1.65.4.post1 +``` + + + +v1.65.4-stable is live. Here are the improvements since v1.65.0-stable. + +## Key Highlights +- **Preventing DB Deadlocks**: Fixes a high-traffic issue when multiple instances were writing to the DB at the same time. +- **New Usage Tab**: Enables viewing spend by model and customizing date range + +Let's dive in. + +### Preventing DB Deadlocks + + + +This release fixes the DB deadlocking issue that users faced in high traffic (10K+ RPS). This is great because it enables user/key/team spend tracking works at that scale. + +Read more about the new architecture [here](https://docs.litellm.ai/docs/proxy/db_deadlocks) + + +### New Usage Tab + + + +The new Usage tab now brings the ability to track daily spend by model. This makes it easier to catch any spend tracking or token counting errors, when combined with the ability to view successful requests, and token usage. + +To test this out, just go to Experimental > New Usage > Activity. + + +## New Models / Updated Models + +1. Databricks - claude-3-7-sonnet cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L10350) +2. VertexAI - `gemini-2.5-pro-exp-03-25` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4492) +3. VertexAI - `gemini-2.0-flash` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4689) +4. Groq - add whisper ASR models to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L3324) +5. IBM - Add watsonx/ibm/granite-3-8b-instruct to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L91) +6. Google AI Studio - add gemini/gemini-2.5-pro-preview-03-25 to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4850) + +## LLM Translation +1. Vertex AI - Support anyOf param for OpenAI json schema translation [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema) +2. Anthropic- response_format + thinking param support (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/reasoning_content) +3. Anthropic - if thinking token is specified and max tokens is not - ensure max token to anthropic is higher than thinking tokens (works across Anthropic API, Bedrock, Vertex) [PR](https://github.com/BerriAI/litellm/pull/9594) +4. Bedrock - latency optimized inference support [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---latency-optimized-inference) +5. Sagemaker - handle special tokens + multibyte character code in response [Get Started](https://docs.litellm.ai/docs/providers/aws_sagemaker) +6. MCP - add support for using SSE MCP servers [Get Started](https://docs.litellm.ai/docs/mcp#usage) +8. Anthropic - new `litellm.messages.create` interface for calling Anthropic `/v1/messages` via passthrough [Get Started](https://docs.litellm.ai/docs/anthropic_unified#usage) +11. Anthropic - support ‘file’ content type in message param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---pdf) +12. Anthropic - map openai 'reasoning_effort' to anthropic 'thinking' param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content) +13. Google AI Studio (Gemini) - [BETA] `/v1/files` upload support [Get Started](../../docs/providers/google_ai_studio/files) +14. Azure - fix o-series tool calling [Get Started](../../docs/providers/azure#tool-calling--function-calling) +15. Unified file id - [ALPHA] allow calling multiple providers with same file id [PR](https://github.com/BerriAI/litellm/pull/9718) + - This is experimental, and not recommended for production use. + - We plan to have a production-ready implementation by next week. +16. Google AI Studio (Gemini) - return logprobs [PR](https://github.com/BerriAI/litellm/pull/9713) +17. Anthropic - Support prompt caching for Anthropic tool calls [Get Started](https://docs.litellm.ai/docs/completion/prompt_caching) +18. OpenRouter - unwrap extra body on open router calls [PR](https://github.com/BerriAI/litellm/pull/9747) +19. VertexAI - fix credential caching issue [PR](https://github.com/BerriAI/litellm/pull/9756) +20. XAI - filter out 'name' param for XAI [PR](https://github.com/BerriAI/litellm/pull/9761) +21. Gemini - image generation output support [Get Started](../../docs/providers/gemini#image-generation) +22. Databricks - support claude-3-7-sonnet w/ thinking + response_format [Get Started](../../docs/providers/databricks#usage---thinking--reasoning_content) + +## Spend Tracking Improvements +1. Reliability fix - Check sent and received model for cost calculation [PR](https://github.com/BerriAI/litellm/pull/9669) +2. Vertex AI - Multimodal embedding cost tracking [Get Started](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings), [PR](https://github.com/BerriAI/litellm/pull/9623) + +## Management Endpoints / UI + + + +1. New Usage Tab + - Report 'total_tokens' + report success/failure calls + - Remove double bars on scroll + - Ensure ‘daily spend’ chart ordered from earliest to latest date + - showing spend per model per day + - show key alias on usage tab + - Allow non-admins to view their activity + - Add date picker to new usage tab +2. Virtual Keys Tab + - remove 'default key' on user signup + - fix showing user models available for personal key creation +3. Test Key Tab + - Allow testing image generation models +4. Models Tab + - Fix bulk adding models + - support reusable credentials for passthrough endpoints + - Allow team members to see team models +5. Teams Tab + - Fix json serialization error on update team metadata +6. Request Logs Tab + - Add reasoning_content token tracking across all providers on streaming +7. API + - return key alias on /user/daily/activity [Get Started](../../docs/proxy/cost_tracking#daily-spend-breakdown-api) +8. SSO + - Allow assigning SSO users to teams on MSFT SSO [PR](https://github.com/BerriAI/litellm/pull/9745) + +## Logging / Guardrail Integrations + +1. Console Logs - Add json formatting for uncaught exceptions [PR](https://github.com/BerriAI/litellm/pull/9619) +2. Guardrails - AIM Guardrails support for virtual key based policies [Get Started](../../docs/proxy/guardrails/aim_security) +3. Logging - fix completion start time tracking [PR](https://github.com/BerriAI/litellm/pull/9688) +4. Prometheus + - Allow adding authentication on Prometheus /metrics endpoints [PR](https://github.com/BerriAI/litellm/pull/9766) + - Distinguish LLM Provider Exception vs. LiteLLM Exception in metric naming [PR](https://github.com/BerriAI/litellm/pull/9760) + - Emit operational metrics for new DB Transaction architecture [PR](https://github.com/BerriAI/litellm/pull/9719) + +## Performance / Loadbalancing / Reliability improvements +1. Preventing Deadlocks + - Reduce DB Deadlocks by storing spend updates in Redis and then committing to DB [PR](https://github.com/BerriAI/litellm/pull/9608) + - Ensure no deadlocks occur when updating DailyUserSpendTransaction [PR](https://github.com/BerriAI/litellm/pull/9690) + - High Traffic fix - ensure new DB + Redis architecture accurately tracks spend [PR](https://github.com/BerriAI/litellm/pull/9673) + - Use Redis for PodLock Manager instead of PG (ensures no deadlocks occur) [PR](https://github.com/BerriAI/litellm/pull/9715) + - v2 DB Deadlock Reduction Architecture – Add Max Size for In-Memory Queue + Backpressure Mechanism [PR](https://github.com/BerriAI/litellm/pull/9759) + +2. Prisma Migrations [Get Started](../../docs/proxy/prod#9-use-prisma-migrate-deploy) + - connects litellm proxy to litellm's prisma migration files + - Handle db schema updates from new `litellm-proxy-extras` sdk +3. Redis - support password for sync sentinel clients [PR](https://github.com/BerriAI/litellm/pull/9622) +4. Fix "Circular reference detected" error when max_parallel_requests = 0 [PR](https://github.com/BerriAI/litellm/pull/9671) +5. Code QA - Ban hardcoded numbers [PR](https://github.com/BerriAI/litellm/pull/9709) + +## Helm +1. fix: wrong indentation of ttlSecondsAfterFinished in chart [PR](https://github.com/BerriAI/litellm/pull/9611) + +## General Proxy Improvements +1. Fix - only apply service_account_settings.enforced_params on service accounts [PR](https://github.com/BerriAI/litellm/pull/9683) +2. Fix - handle metadata null on `/chat/completion` [PR](https://github.com/BerriAI/litellm/issues/9717) +3. Fix - Move daily user transaction logging outside of 'disable_spend_logs' flag, as they’re unrelated [PR](https://github.com/BerriAI/litellm/pull/9772) + +## Demo + +Try this on the demo instance [today](https://docs.litellm.ai/docs/proxy/demo) + +## Complete Git Diff + +See the complete git diff since v1.65.0-stable, [here](https://github.com/BerriAI/litellm/releases/tag/v1.65.4-stable) + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1542edceb5..b8591cb993 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -53,7 +53,7 @@ const sidebars = { { type: "category", label: "Architecture", - items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"], + items: ["proxy/architecture", "proxy/db_info", "proxy/db_deadlocks", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"], }, { type: "link", @@ -188,7 +188,15 @@ const sidebars = { "providers/azure_ai", "providers/aiml", "providers/vertex", - "providers/gemini", + + { + type: "category", + label: "Google AI Studio", + items: [ + "providers/gemini", + "providers/google_ai_studio/files", + ] + }, "providers/anthropic", "providers/aws_sagemaker", "providers/bedrock", diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3-py3-none-any.whl b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3-py3-none-any.whl new file mode 100644 index 0000000000..12f72a933f Binary files /dev/null and b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3-py3-none-any.whl differ diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3.tar.gz b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3.tar.gz new file mode 100644 index 0000000000..590be31628 Binary files /dev/null and b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3.tar.gz differ diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma new file mode 100644 index 0000000000..faf110ca96 --- /dev/null +++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma @@ -0,0 +1,356 @@ +datasource client { + provider = "postgresql" + url = env("DATABASE_URL") +} + +generator client { + provider = "prisma-client-py" +} + +// Budget / Rate Limits for an org +model LiteLLM_BudgetTable { + budget_id String @id @default(uuid()) + max_budget Float? + soft_budget Float? + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? + model_max_budget Json? + budget_duration String? + budget_reset_at DateTime? + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String + organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget + keys LiteLLM_VerificationToken[] // multiple keys can have the same budget + end_users LiteLLM_EndUserTable[] // multiple end-users can have the same budget + team_membership LiteLLM_TeamMembership[] // budgets of Users within a Team + organization_membership LiteLLM_OrganizationMembership[] // budgets of Users within a Organization +} + +// Models on proxy +model LiteLLM_CredentialsTable { + credential_id String @id @default(uuid()) + credential_name String @unique + credential_values Json + credential_info Json? + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String +} + +// Models on proxy +model LiteLLM_ProxyModelTable { + model_id String @id @default(uuid()) + model_name String + litellm_params Json + model_info Json? + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String +} + +model LiteLLM_OrganizationTable { + organization_id String @id @default(uuid()) + organization_alias String + budget_id String + metadata Json @default("{}") + models String[] + spend Float @default(0.0) + model_spend Json @default("{}") + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + teams LiteLLM_TeamTable[] + users LiteLLM_UserTable[] + keys LiteLLM_VerificationToken[] + members LiteLLM_OrganizationMembership[] @relation("OrganizationToMembership") +} + +// Model info for teams, just has model aliases for now. +model LiteLLM_ModelTable { + id Int @id @default(autoincrement()) + model_aliases Json? @map("aliases") + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String + team LiteLLM_TeamTable? +} + + +// Assign prod keys to groups, not individuals +model LiteLLM_TeamTable { + team_id String @id @default(uuid()) + team_alias String? + organization_id String? + admins String[] + members String[] + members_with_roles Json @default("{}") + metadata Json @default("{}") + max_budget Float? + spend Float @default(0.0) + models String[] + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? + budget_duration String? + budget_reset_at DateTime? + blocked Boolean @default(false) + created_at DateTime @default(now()) @map("created_at") + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + model_spend Json @default("{}") + model_max_budget Json @default("{}") + model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases + litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) + litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id]) +} + +// Track spend, rate limit, budget Users +model LiteLLM_UserTable { + user_id String @id + user_alias String? + team_id String? + sso_user_id String? @unique + organization_id String? + password String? + teams String[] @default([]) + user_role String? + max_budget Float? + spend Float @default(0.0) + user_email String? + models String[] + metadata Json @default("{}") + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? + budget_duration String? + budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) + model_spend Json @default("{}") + model_max_budget Json @default("{}") + created_at DateTime? @default(now()) @map("created_at") + updated_at DateTime? @default(now()) @updatedAt @map("updated_at") + + // relations + litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) + organization_memberships LiteLLM_OrganizationMembership[] + invitations_created LiteLLM_InvitationLink[] @relation("CreatedBy") + invitations_updated LiteLLM_InvitationLink[] @relation("UpdatedBy") + invitations_user LiteLLM_InvitationLink[] @relation("UserId") +} + +// Generate Tokens for Proxy +model LiteLLM_VerificationToken { + token String @id + key_name String? + key_alias String? + soft_budget_cooldown Boolean @default(false) // key-level state on if budget alerts need to be cooled down + spend Float @default(0.0) + expires DateTime? + models String[] + aliases Json @default("{}") + config Json @default("{}") + user_id String? + team_id String? + permissions Json @default("{}") + max_parallel_requests Int? + metadata Json @default("{}") + blocked Boolean? + tpm_limit BigInt? + rpm_limit BigInt? + max_budget Float? + budget_duration String? + budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) + model_spend Json @default("{}") + model_max_budget Json @default("{}") + budget_id String? + organization_id String? + created_at DateTime? @default(now()) @map("created_at") + created_by String? + updated_at DateTime? @default(now()) @updatedAt @map("updated_at") + updated_by String? + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) +} + +model LiteLLM_EndUserTable { + user_id String @id + alias String? // admin-facing alias + spend Float @default(0.0) + allowed_model_region String? // require all user requests to use models in this specific region + default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model. + budget_id String? + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + blocked Boolean @default(false) +} + +// store proxy config.yaml +model LiteLLM_Config { + param_name String @id + param_value Json? +} + +// View spend, model, api_key per request +model LiteLLM_SpendLogs { + request_id String @id + call_type String + api_key String @default ("") // Hashed API Token. Not the actual Virtual Key. Equivalent to 'token' column in LiteLLM_VerificationToken + spend Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + startTime DateTime // Assuming start_time is a DateTime field + endTime DateTime // Assuming end_time is a DateTime field + completionStartTime DateTime? // Assuming completionStartTime is a DateTime field + model String @default("") + model_id String? @default("") // the model id stored in proxy model db + model_group String? @default("") // public model_name / model_group + custom_llm_provider String? @default("") // litellm used custom_llm_provider + api_base String? @default("") + user String? @default("") + metadata Json? @default("{}") + cache_hit String? @default("") + cache_key String? @default("") + request_tags Json? @default("[]") + team_id String? + end_user String? + requester_ip_address String? + messages Json? @default("{}") + response Json? @default("{}") + @@index([startTime]) + @@index([end_user]) +} + +// View spend, model, api_key per request +model LiteLLM_ErrorLogs { + request_id String @id @default(uuid()) + startTime DateTime // Assuming start_time is a DateTime field + endTime DateTime // Assuming end_time is a DateTime field + api_base String @default("") + model_group String @default("") // public model_name / model_group + litellm_model_name String @default("") // model passed to litellm + model_id String @default("") // ID of model in ProxyModelTable + request_kwargs Json @default("{}") + exception_type String @default("") + exception_string String @default("") + status_code String @default("") +} + +// Beta - allow team members to request access to a model +model LiteLLM_UserNotifications { + request_id String @id + user_id String + models String[] + justification String + status String // approved, disapproved, pending +} + +model LiteLLM_TeamMembership { + // Use this table to track the Internal User's Spend within a Team + Set Budgets, rpm limits for the user within the team + user_id String + team_id String + spend Float @default(0.0) + budget_id String? + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + @@id([user_id, team_id]) +} + +model LiteLLM_OrganizationMembership { + // Use this table to track Internal User and Organization membership. Helps tracking a users role within an Organization + user_id String + organization_id String + user_role String? + spend Float? @default(0.0) + budget_id String? + created_at DateTime? @default(now()) @map("created_at") + updated_at DateTime? @default(now()) @updatedAt @map("updated_at") + + // relations + user LiteLLM_UserTable @relation(fields: [user_id], references: [user_id]) + organization LiteLLM_OrganizationTable @relation("OrganizationToMembership", fields: [organization_id], references: [organization_id]) + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + + + + @@id([user_id, organization_id]) + @@unique([user_id, organization_id]) +} + +model LiteLLM_InvitationLink { + // use this table to track invite links sent by admin for people to join the proxy + id String @id @default(uuid()) + user_id String + is_accepted Boolean @default(false) + accepted_at DateTime? // when link is claimed (user successfully onboards via link) + expires_at DateTime // till when is link valid + created_at DateTime // when did admin create the link + created_by String // who created the link + updated_at DateTime // when was invite status updated + updated_by String // who updated the status (admin/user who accepted invite) + + // Relations + liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id]) + liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id]) + liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id]) +} + + +model LiteLLM_AuditLog { + id String @id @default(uuid()) + updated_at DateTime @default(now()) + changed_by String @default("") // user or system that performed the action + changed_by_api_key String @default("") // api key hash that performed the action + action String // create, update, delete + table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME, + object_id String // id of the object being audited. This can be the key id, team id, user id, model id + before_value Json? // value of the row + updated_values Json? // value of the row after change +} + +// Track daily user spend metrics per model and key +model LiteLLM_DailyUserSpend { + id String @id @default(uuid()) + user_id String + date String + api_key String + model String + model_group String? + custom_llm_provider String? + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + spend Float @default(0.0) + api_requests Int @default(0) + successful_requests Int @default(0) + failed_requests Int @default(0) + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([user_id, date, api_key, model, custom_llm_provider]) + @@index([date]) + @@index([user_id]) + @@index([api_key]) + @@index([model]) +} + + +// Track the status of cron jobs running. Only allow one pod to run the job at a time +model LiteLLM_CronJob { + cronjob_id String @id @default(cuid()) // Unique ID for the record + pod_id String // Unique identifier for the pod acting as the leader + status JobStatus @default(INACTIVE) // Status of the cron job (active or inactive) + last_updated DateTime @default(now()) // Timestamp for the last update of the cron job record + ttl DateTime // Time when the leader's lease expires +} + +enum JobStatus { + ACTIVE + INACTIVE +} + diff --git a/litellm-proxy-extras/litellm_proxy_extras/utils.py b/litellm-proxy-extras/litellm_proxy_extras/utils.py index 894ae34122..cd9beeb753 100644 --- a/litellm-proxy-extras/litellm_proxy_extras/utils.py +++ b/litellm-proxy-extras/litellm_proxy_extras/utils.py @@ -30,21 +30,23 @@ class ProxyExtrasDBManager: use_migrate = str_to_bool(os.getenv("USE_PRISMA_MIGRATE")) or use_migrate for attempt in range(4): original_dir = os.getcwd() - schema_dir = os.path.dirname(schema_path) - os.chdir(schema_dir) + migrations_dir = os.path.dirname(__file__) + os.chdir(migrations_dir) try: if use_migrate: logger.info("Running prisma migrate deploy") try: # Set migrations directory for Prisma - subprocess.run( + result = subprocess.run( ["prisma", "migrate", "deploy"], timeout=60, check=True, capture_output=True, text=True, ) + logger.info(f"prisma migrate deploy stdout: {result.stdout}") + logger.info("prisma migrate deploy completed") return True except subprocess.CalledProcessError as e: @@ -77,4 +79,5 @@ class ProxyExtrasDBManager: time.sleep(random.randrange(5, 15)) finally: os.chdir(original_dir) + pass return False diff --git a/litellm-proxy-extras/pyproject.toml b/litellm-proxy-extras/pyproject.toml index aea27371fe..21484d5097 100644 --- a/litellm-proxy-extras/pyproject.toml +++ b/litellm-proxy-extras/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm-proxy-extras" -version = "0.1.2" +version = "0.1.3" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." authors = ["BerriAI"] readme = "README.md" @@ -22,7 +22,7 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "0.1.2" +version = "0.1.3" version_files = [ "pyproject.toml:version", "../requirements.txt:litellm-proxy-extras==", diff --git a/litellm/__init__.py b/litellm/__init__.py index 9997b9a8ac..e061643398 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -56,6 +56,9 @@ from litellm.constants import ( bedrock_embedding_models, known_tokenizer_config, BEDROCK_INVOKE_PROVIDERS_LITERAL, + DEFAULT_MAX_TOKENS, + DEFAULT_SOFT_BUDGET, + DEFAULT_ALLOWED_FAILS, ) from litellm.types.guardrails import GuardrailItem from litellm.proxy._types import ( @@ -120,6 +123,7 @@ callbacks: List[ langfuse_default_tags: Optional[List[str]] = None langsmith_batch_size: Optional[int] = None prometheus_initialize_budget_metrics: Optional[bool] = False +require_auth_for_metrics_endpoint: Optional[bool] = False argilla_batch_size: Optional[int] = None datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload gcs_pub_sub_use_v1: Optional[ @@ -155,7 +159,7 @@ token: Optional[ str ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 telemetry = True -max_tokens = 256 # OpenAI Defaults +max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False)) modify_params = False retry = True @@ -244,7 +248,7 @@ budget_duration: Optional[ str ] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). default_soft_budget: float = ( - 50.0 # by default all litellm proxy keys have a soft budget of 50.0 + DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0 ) forward_traceparent_to_llm_provider: bool = False @@ -796,9 +800,8 @@ from .llms.aiohttp_openai.chat.transformation import AiohttpOpenAIChatConfig from .llms.galadriel.chat.transformation import GaladrielChatConfig from .llms.github.chat.transformation import GithubChatConfig from .llms.empower.chat.transformation import EmpowerChatConfig -from .llms.huggingface.chat.transformation import ( - HuggingfaceChatConfig as HuggingfaceConfig, -) +from .llms.huggingface.chat.transformation import HuggingFaceChatConfig +from .llms.huggingface.embedding.transformation import HuggingFaceEmbeddingConfig from .llms.oobabooga.chat.transformation import OobaboogaConfig from .llms.maritalk import MaritalkConfig from .llms.openrouter.chat.transformation import OpenrouterConfig diff --git a/litellm/_redis.py b/litellm/_redis.py index b2624d4280..14813c436e 100644 --- a/litellm/_redis.py +++ b/litellm/_redis.py @@ -18,6 +18,7 @@ import redis # type: ignore import redis.asyncio as async_redis # type: ignore from litellm import get_secret, get_secret_str +from litellm.constants import REDIS_CONNECTION_POOL_TIMEOUT, REDIS_SOCKET_TIMEOUT from ._logging import verbose_logger @@ -215,7 +216,7 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis: # Set up the Sentinel client sentinel = redis.Sentinel( sentinel_nodes, - socket_timeout=0.1, + socket_timeout=REDIS_SOCKET_TIMEOUT, password=sentinel_password, ) @@ -239,7 +240,7 @@ def _init_async_redis_sentinel(redis_kwargs) -> async_redis.Redis: # Set up the Sentinel client sentinel = async_redis.Sentinel( sentinel_nodes, - socket_timeout=0.1, + socket_timeout=REDIS_SOCKET_TIMEOUT, password=sentinel_password, ) @@ -319,7 +320,7 @@ def get_redis_connection_pool(**env_overrides): verbose_logger.debug("get_redis_connection_pool: redis_kwargs", redis_kwargs) if "url" in redis_kwargs and redis_kwargs["url"] is not None: return async_redis.BlockingConnectionPool.from_url( - timeout=5, url=redis_kwargs["url"] + timeout=REDIS_CONNECTION_POOL_TIMEOUT, url=redis_kwargs["url"] ) connection_class = async_redis.Connection if "ssl" in redis_kwargs: @@ -327,4 +328,6 @@ def get_redis_connection_pool(**env_overrides): redis_kwargs.pop("ssl", None) redis_kwargs["connection_class"] = connection_class redis_kwargs.pop("startup_nodes", None) - return async_redis.BlockingConnectionPool(timeout=5, **redis_kwargs) + return async_redis.BlockingConnectionPool( + timeout=REDIS_CONNECTION_POOL_TIMEOUT, **redis_kwargs + ) diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index 8f835bea83..7a60359d54 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -124,6 +124,7 @@ class ServiceLogging(CustomLogger): service=service, duration=duration, call_type=call_type, + event_metadata=event_metadata, ) for callback in litellm.service_callback: @@ -229,6 +230,7 @@ class ServiceLogging(CustomLogger): service=service, duration=duration, call_type=call_type, + event_metadata=event_metadata, ) for callback in litellm.service_callback: diff --git a/litellm/budget_manager.py b/litellm/budget_manager.py index e664c4f44f..b25967579e 100644 --- a/litellm/budget_manager.py +++ b/litellm/budget_manager.py @@ -14,6 +14,12 @@ import time from typing import Literal, Optional import litellm +from litellm.constants import ( + DAYS_IN_A_MONTH, + DAYS_IN_A_WEEK, + DAYS_IN_A_YEAR, + HOURS_IN_A_DAY, +) from litellm.utils import ModelResponse @@ -81,11 +87,11 @@ class BudgetManager: if duration == "daily": duration_in_days = 1 elif duration == "weekly": - duration_in_days = 7 + duration_in_days = DAYS_IN_A_WEEK elif duration == "monthly": - duration_in_days = 28 + duration_in_days = DAYS_IN_A_MONTH elif duration == "yearly": - duration_in_days = 365 + duration_in_days = DAYS_IN_A_YEAR else: raise ValueError( """duration needs to be one of ["daily", "weekly", "monthly", "yearly"]""" @@ -182,7 +188,9 @@ class BudgetManager: current_time = time.time() # Convert duration from days to seconds - duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60 + duration_in_seconds = ( + self.user_dict[user]["duration"] * HOURS_IN_A_DAY * 60 * 60 + ) # Check if duration has elapsed if current_time - last_updated_at >= duration_in_seconds: diff --git a/litellm/caching/caching.py b/litellm/caching/caching.py index affb8e3855..6a7c93e3fe 100644 --- a/litellm/caching/caching.py +++ b/litellm/caching/caching.py @@ -19,6 +19,7 @@ from pydantic import BaseModel import litellm from litellm._logging import verbose_logger +from litellm.constants import CACHED_STREAMING_CHUNK_DELAY from litellm.litellm_core_utils.model_param_helper import ModelParamHelper from litellm.types.caching import * from litellm.types.utils import all_litellm_params @@ -406,7 +407,7 @@ class Cache: } ] } - time.sleep(0.02) + time.sleep(CACHED_STREAMING_CHUNK_DELAY) def _get_cache_logic( self, diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 5e09fe845f..e3d757d08d 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -15,7 +15,8 @@ from typing import Any, List, Optional from pydantic import BaseModel -from ..constants import MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB +from litellm.constants import MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB + from .base_cache import BaseCache @@ -52,7 +53,8 @@ class InMemoryCache(BaseCache): # Fast path for common primitive types that are typically small if ( isinstance(value, (bool, int, float, str)) - and len(str(value)) < self.max_size_per_item * 512 + and len(str(value)) + < self.max_size_per_item * MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB ): # Conservative estimate return True diff --git a/litellm/caching/qdrant_semantic_cache.py b/litellm/caching/qdrant_semantic_cache.py index bdfd3770ae..32d4d8b0fd 100644 --- a/litellm/caching/qdrant_semantic_cache.py +++ b/litellm/caching/qdrant_semantic_cache.py @@ -11,10 +11,12 @@ Has 4 methods: import ast import asyncio import json -from typing import Any +from typing import Any, cast import litellm from litellm._logging import print_verbose +from litellm.constants import QDRANT_SCALAR_QUANTILE, QDRANT_VECTOR_SIZE +from litellm.types.utils import EmbeddingResponse from .base_cache import BaseCache @@ -118,7 +120,11 @@ class QdrantSemanticCache(BaseCache): } elif quantization_config == "scalar": quantization_params = { - "scalar": {"type": "int8", "quantile": 0.99, "always_ram": False} + "scalar": { + "type": "int8", + "quantile": QDRANT_SCALAR_QUANTILE, + "always_ram": False, + } } elif quantization_config == "product": quantization_params = { @@ -132,7 +138,7 @@ class QdrantSemanticCache(BaseCache): new_collection_status = self.sync_client.put( url=f"{self.qdrant_api_base}/collections/{self.collection_name}", json={ - "vectors": {"size": 1536, "distance": "Cosine"}, + "vectors": {"size": QDRANT_VECTOR_SIZE, "distance": "Cosine"}, "quantization_config": quantization_params, }, headers=self.headers, @@ -171,10 +177,13 @@ class QdrantSemanticCache(BaseCache): prompt += message["content"] # create an embedding for prompt - embedding_response = litellm.embedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, + embedding_response = cast( + EmbeddingResponse, + litellm.embedding( + model=self.embedding_model, + input=prompt, + cache={"no-store": True, "no-cache": True}, + ), ) # get the embedding @@ -212,10 +221,13 @@ class QdrantSemanticCache(BaseCache): prompt += message["content"] # convert to embedding - embedding_response = litellm.embedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, + embedding_response = cast( + EmbeddingResponse, + litellm.embedding( + model=self.embedding_model, + input=prompt, + cache={"no-store": True, "no-cache": True}, + ), ) # get the embedding diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py index 63cd4d0959..31e11abf97 100644 --- a/litellm/caching/redis_cache.py +++ b/litellm/caching/redis_cache.py @@ -304,12 +304,18 @@ class RedisCache(BaseCache): key = self.check_and_fix_namespace(key=key) ttl = self.get_ttl(**kwargs) + nx = kwargs.get("nx", False) print_verbose(f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}") try: if not hasattr(_redis_client, "set"): raise Exception("Redis client cannot set cache. Attribute not found.") - await _redis_client.set(name=key, value=json.dumps(value), ex=ttl) + result = await _redis_client.set( + name=key, + value=json.dumps(value), + nx=nx, + ex=ttl, + ) print_verbose( f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}" ) @@ -326,6 +332,7 @@ class RedisCache(BaseCache): event_metadata={"key": key}, ) ) + return result except Exception as e: end_time = time.time() _duration = end_time - start_time @@ -931,7 +938,7 @@ class RedisCache(BaseCache): # typed as Any, redis python lib has incomplete type stubs for RedisCluster and does not include `delete` _redis_client: Any = self.init_async_client() # keys is str - await _redis_client.delete(key) + return await _redis_client.delete(key) def delete_cache(self, key): self.redis_client.delete(key) diff --git a/litellm/constants.py b/litellm/constants.py index cace674f2f..c8248f548a 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -9,6 +9,7 @@ DEFAULT_FAILURE_THRESHOLD_PERCENT = ( 0.5 # default cooldown a deployment if 50% of requests fail in a given minute ) DEFAULT_MAX_TOKENS = 4096 +DEFAULT_ALLOWED_FAILS = 3 DEFAULT_REDIS_SYNC_INTERVAL = 1 DEFAULT_COOLDOWN_TIME_SECONDS = 5 DEFAULT_REPLICATE_POLLING_RETRIES = 5 @@ -16,16 +17,76 @@ DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1 DEFAULT_IMAGE_TOKEN_COUNT = 250 DEFAULT_IMAGE_WIDTH = 300 DEFAULT_IMAGE_HEIGHT = 300 +DEFAULT_MAX_TOKENS = 256 # used when providers need a default MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. + +########### v2 Architecture constants for managing writing updates to the database ########### REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer" REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer" MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100 +MAX_SIZE_IN_MEMORY_QUEUE = 10000 +MAX_IN_MEMORY_QUEUE_FLUSH_COUNT = 1000 +############################################################################################### +MINIMUM_PROMPT_CACHE_TOKEN_COUNT = ( + 1024 # minimum number of tokens to cache a prompt by Anthropic +) +DEFAULT_TRIM_RATIO = 0.75 # default ratio of tokens to trim from the end of a prompt +HOURS_IN_A_DAY = 24 +DAYS_IN_A_WEEK = 7 +DAYS_IN_A_MONTH = 28 +DAYS_IN_A_YEAR = 365 +REPLICATE_MODEL_NAME_WITH_ID_LENGTH = 64 +#### TOKEN COUNTING #### +FUNCTION_DEFINITION_TOKEN_COUNT = 9 +SYSTEM_MESSAGE_TOKEN_COUNT = 4 +TOOL_CHOICE_OBJECT_TOKEN_COUNT = 4 +DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT = 10 +DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT = 20 +MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES = 768 +MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES = 2000 +MAX_TILE_WIDTH = 512 +MAX_TILE_HEIGHT = 512 +OPENAI_FILE_SEARCH_COST_PER_1K_CALLS = 2.5 / 1000 +MIN_NON_ZERO_TEMPERATURE = 0.0001 #### RELIABILITY #### REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. +DEFAULT_MAX_LRU_CACHE_SIZE = 16 +INITIAL_RETRY_DELAY = 0.5 +MAX_RETRY_DELAY = 8.0 +JITTER = 0.75 +DEFAULT_IN_MEMORY_TTL = 5 # default time to live for the in-memory cache +DEFAULT_POLLING_INTERVAL = 0.03 # default polling interval for the scheduler +AZURE_OPERATION_POLLING_TIMEOUT = 120 +REDIS_SOCKET_TIMEOUT = 0.1 +REDIS_CONNECTION_POOL_TIMEOUT = 5 +NON_LLM_CONNECTION_TIMEOUT = 15 # timeout for adjacent services (e.g. jwt auth) +MAX_EXCEPTION_MESSAGE_LENGTH = 2000 +BEDROCK_MAX_POLICY_SIZE = 75 +REPLICATE_POLLING_DELAY_SECONDS = 0.5 +DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS = 4096 +TOGETHER_AI_4_B = 4 +TOGETHER_AI_8_B = 8 +TOGETHER_AI_21_B = 21 +TOGETHER_AI_41_B = 41 +TOGETHER_AI_80_B = 80 +TOGETHER_AI_110_B = 110 +TOGETHER_AI_EMBEDDING_150_M = 150 +TOGETHER_AI_EMBEDDING_350_M = 350 +QDRANT_SCALAR_QUANTILE = 0.99 +QDRANT_VECTOR_SIZE = 1536 +CACHED_STREAMING_CHUNK_DELAY = 0.02 +MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 512 +DEFAULT_MAX_TOKENS_FOR_TRITON = 2000 #### Networking settings #### request_timeout: float = 6000 # time in seconds STREAM_SSE_DONE_STRING: str = "[DONE]" +### SPEND TRACKING ### +DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400 # price per second for a100 80GB +FIREWORKS_AI_56_B_MOE = 56 +FIREWORKS_AI_176_B_MOE = 176 +FIREWORKS_AI_16_B = 16 +FIREWORKS_AI_80_B = 80 LITELLM_CHAT_PROVIDERS = [ "openai", @@ -426,6 +487,9 @@ MCP_TOOL_NAME_PREFIX = "mcp_tool" MAX_SPENDLOG_ROWS_TO_QUERY = ( 1_000_000 # if spendLogs has more than 1M rows, do not query the DB ) +DEFAULT_SOFT_BUDGET = ( + 50.0 # by default all litellm proxy keys have a soft budget of 50.0 +) # makes it clear this is a rate limit error for a litellm virtual key RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash" @@ -451,3 +515,14 @@ LITELLM_PROXY_ADMIN_NAME = "default_user_id" ########################### DB CRON JOB NAMES ########################### DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job" DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60 # 1 minute +PROXY_BUDGET_RESCHEDULER_MIN_TIME = 597 +PROXY_BUDGET_RESCHEDULER_MAX_TIME = 605 +PROXY_BATCH_WRITE_AT = 10 # in seconds +DEFAULT_HEALTH_CHECK_INTERVAL = 300 # 5 minutes +PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS = 9 +DEFAULT_MODEL_CREATED_AT_TIME = 1677610602 # returns on `/models` endpoint +DEFAULT_SLACK_ALERTING_THRESHOLD = 300 +MAX_TEAM_LIST_LIMIT = 20 +DEFAULT_PROMPT_INJECTION_SIMILARITY_THRESHOLD = 0.7 +LENGTH_OF_LITELLM_GENERATED_KEY = 16 +SECRET_MANAGER_REFRESH_INTERVAL = 86400 diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index de12698658..98c73a4ce7 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -9,6 +9,10 @@ from pydantic import BaseModel import litellm import litellm._logging from litellm import verbose_logger +from litellm.constants import ( + DEFAULT_MAX_LRU_CACHE_SIZE, + DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND, +) from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import ( StandardBuiltInToolCostTracking, ) @@ -355,9 +359,7 @@ def cost_per_token( # noqa: PLR0915 def get_replicate_completion_pricing(completion_response: dict, total_time=0.0): # see https://replicate.com/pricing # for all litellm currently supported LLMs, almost all requests go to a100_80gb - a100_80gb_price_per_second_public = ( - 0.001400 # assume all calls sent to A100 80GB for now - ) + a100_80gb_price_per_second_public = DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND # assume all calls sent to A100 80GB for now if total_time == 0.0: # total time is in ms start_time = completion_response.get("created", time.time()) end_time = getattr(completion_response, "ended", time.time()) @@ -450,7 +452,7 @@ def _select_model_name_for_cost_calc( return return_model -@lru_cache(maxsize=16) +@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE) def _model_contains_known_llm_provider(model: str) -> bool: """ Check if the model contains a known llm provider diff --git a/litellm/files/main.py b/litellm/files/main.py index cdc3115a6f..7516088f83 100644 --- a/litellm/files/main.py +++ b/litellm/files/main.py @@ -63,16 +63,17 @@ async def acreate_file( loop = asyncio.get_event_loop() kwargs["acreate_file"] = True - # Use a partial function to pass your keyword arguments - func = partial( - create_file, - file, - purpose, - custom_llm_provider, - extra_headers, - extra_body, + call_args = { + "file": file, + "purpose": purpose, + "custom_llm_provider": custom_llm_provider, + "extra_headers": extra_headers, + "extra_body": extra_body, **kwargs, - ) + } + + # Use a partial function to pass your keyword arguments + func = partial(create_file, **call_args) # Add the context to the function ctx = contextvars.copy_context() @@ -92,7 +93,7 @@ async def acreate_file( def create_file( file: FileTypes, purpose: Literal["assistants", "batch", "fine-tune"], - custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai", + custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai"]] = None, extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, @@ -101,6 +102,8 @@ def create_file( Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + + Specify either provider_list or custom_llm_provider. """ try: _is_async = kwargs.pop("acreate_file", False) is True @@ -120,7 +123,7 @@ def create_file( if ( timeout is not None and isinstance(timeout, httpx.Timeout) - and supports_httpx_timeout(custom_llm_provider) is False + and supports_httpx_timeout(cast(str, custom_llm_provider)) is False ): read_timeout = timeout.read or 600 timeout = read_timeout # default 10 min timeout diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py index 50f0538cfd..9fde042ae7 100644 --- a/litellm/integrations/SlackAlerting/slack_alerting.py +++ b/litellm/integrations/SlackAlerting/slack_alerting.py @@ -16,6 +16,7 @@ import litellm.litellm_core_utils.litellm_logging import litellm.types from litellm._logging import verbose_logger, verbose_proxy_logger from litellm.caching.caching import DualCache +from litellm.constants import HOURS_IN_A_DAY from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.litellm_core_utils.duration_parser import duration_in_seconds from litellm.litellm_core_utils.exception_mapping_utils import ( @@ -649,10 +650,10 @@ class SlackAlerting(CustomBatchLogger): event_message += ( f"Budget Crossed\n Total Budget:`{user_info.max_budget}`" ) - elif percent_left <= 0.05: + elif percent_left <= SLACK_ALERTING_THRESHOLD_5_PERCENT: event = "threshold_crossed" event_message += "5% Threshold Crossed " - elif percent_left <= 0.15: + elif percent_left <= SLACK_ALERTING_THRESHOLD_15_PERCENT: event = "threshold_crossed" event_message += "15% Threshold Crossed" elif user_info.soft_budget is not None: @@ -1718,7 +1719,7 @@ Model Info: await self.internal_usage_cache.async_set_cache( key=_event_cache_key, value="SENT", - ttl=(30 * 24 * 60 * 60), # 1 month + ttl=(30 * HOURS_IN_A_DAY * 60 * 60), # 1 month ) except Exception as e: diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index e9b6b6b164..fb6fee6dc6 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -41,7 +41,7 @@ from litellm.types.utils import StandardLoggingPayload from ..additional_logging_utils import AdditionalLoggingUtils # max number of logs DD API can accept -DD_MAX_BATCH_SIZE = 1000 + # specify what ServiceTypes are logged as success events to DD. (We don't want to spam DD traces with large number of service types) DD_LOGGED_SUCCESS_SERVICE_TYPES = [ diff --git a/litellm/integrations/gcs_bucket/gcs_bucket.py b/litellm/integrations/gcs_bucket/gcs_bucket.py index 187ab779c0..972a023666 100644 --- a/litellm/integrations/gcs_bucket/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket/gcs_bucket.py @@ -20,10 +20,6 @@ else: VertexBase = Any -GCS_DEFAULT_BATCH_SIZE = 2048 -GCS_DEFAULT_FLUSH_INTERVAL_SECONDS = 20 - - class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils): def __init__(self, bucket_name: Optional[str] = None) -> None: from litellm.proxy.proxy_server import premium_user @@ -125,6 +121,7 @@ class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils): gcs_logging_config: GCSLoggingConfig = await self.get_gcs_logging_config( kwargs ) + headers = await self.construct_request_headers( vertex_instance=gcs_logging_config["vertex_instance"], service_account_json=gcs_logging_config["path_service_account"], diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 5ac8c80eb3..6fba69d005 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -818,7 +818,7 @@ class PrometheusLogger(CustomLogger): requested_model=request_data.get("model", ""), status_code=str(getattr(original_exception, "status_code", None)), exception_status=str(getattr(original_exception, "status_code", None)), - exception_class=str(original_exception.__class__.__name__), + exception_class=self._get_exception_class_name(original_exception), tags=_tags, ) _labels = prometheus_label_factory( @@ -917,7 +917,7 @@ class PrometheusLogger(CustomLogger): api_base=api_base, api_provider=llm_provider, exception_status=str(getattr(exception, "status_code", None)), - exception_class=exception.__class__.__name__, + exception_class=self._get_exception_class_name(exception), requested_model=model_group, hashed_api_key=standard_logging_payload["metadata"][ "user_api_key_hash" @@ -1146,6 +1146,22 @@ class PrometheusLogger(CustomLogger): ) return + @staticmethod + def _get_exception_class_name(exception: Exception) -> str: + exception_class_name = "" + if hasattr(exception, "llm_provider"): + exception_class_name = getattr(exception, "llm_provider") or "" + + # pretty print the provider name on prometheus + # eg. `openai` -> `Openai.` + if len(exception_class_name) >= 1: + exception_class_name = ( + exception_class_name[0].upper() + exception_class_name[1:] + "." + ) + + exception_class_name += exception.__class__.__name__ + return exception_class_name + async def log_success_fallback_event( self, original_model_group: str, kwargs: dict, original_exception: Exception ): @@ -1181,7 +1197,7 @@ class PrometheusLogger(CustomLogger): team=standard_metadata["user_api_key_team_id"], team_alias=standard_metadata["user_api_key_team_alias"], exception_status=str(getattr(original_exception, "status_code", None)), - exception_class=str(original_exception.__class__.__name__), + exception_class=self._get_exception_class_name(original_exception), tags=_tags, ) _labels = prometheus_label_factory( @@ -1225,7 +1241,7 @@ class PrometheusLogger(CustomLogger): team=standard_metadata["user_api_key_team_id"], team_alias=standard_metadata["user_api_key_team_alias"], exception_status=str(getattr(original_exception, "status_code", None)), - exception_class=str(original_exception.__class__.__name__), + exception_class=self._get_exception_class_name(original_exception), tags=_tags, ) @@ -1721,6 +1737,36 @@ class PrometheusLogger(CustomLogger): return (end_time - start_time).total_seconds() return None + @staticmethod + def _mount_metrics_endpoint(premium_user: bool): + """ + Mount the Prometheus metrics endpoint with optional authentication. + + Args: + premium_user (bool): Whether the user is a premium user + require_auth (bool, optional): Whether to require authentication for the metrics endpoint. + Defaults to False. + """ + from prometheus_client import make_asgi_app + + from litellm._logging import verbose_proxy_logger + from litellm.proxy._types import CommonProxyErrors + from litellm.proxy.proxy_server import app + + if premium_user is not True: + verbose_proxy_logger.warning( + f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}" + ) + + # Create metrics ASGI app + metrics_app = make_asgi_app() + + # Mount the metrics app to the app + app.mount("/metrics", metrics_app) + verbose_proxy_logger.debug( + "Starting Prometheus Metrics on /metrics (no authentication)" + ) + def prometheus_label_factory( supported_enum_labels: List[str], diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index 4bf293fb01..a5f2f0b5c7 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -3,11 +3,16 @@ # On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers) -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union from litellm._logging import print_verbose, verbose_logger from litellm.types.integrations.prometheus import LATENCY_BUCKETS -from litellm.types.services import ServiceLoggerPayload, ServiceTypes +from litellm.types.services import ( + DEFAULT_SERVICE_CONFIGS, + ServiceLoggerPayload, + ServiceMetrics, + ServiceTypes, +) FAILED_REQUESTS_LABELS = ["error_class", "function_name"] @@ -23,7 +28,8 @@ class PrometheusServicesLogger: ): try: try: - from prometheus_client import REGISTRY, Counter, Histogram + from prometheus_client import REGISTRY, Counter, Gauge, Histogram + from prometheus_client.gc_collector import Collector except ImportError: raise Exception( "Missing prometheus_client. Run `pip install prometheus-client`" @@ -31,36 +37,51 @@ class PrometheusServicesLogger: self.Histogram = Histogram self.Counter = Counter + self.Gauge = Gauge self.REGISTRY = REGISTRY verbose_logger.debug("in init prometheus services metrics") - self.services = [item.value for item in ServiceTypes] + self.payload_to_prometheus_map: Dict[ + str, List[Union[Histogram, Counter, Gauge, Collector]] + ] = {} - self.payload_to_prometheus_map = ( - {} - ) # store the prometheus histogram/counter we need to call for each field in payload + for service in ServiceTypes: + service_metrics: List[Union[Histogram, Counter, Gauge, Collector]] = [] - for service in self.services: - histogram = self.create_histogram(service, type_of_request="latency") - counter_failed_request = self.create_counter( - service, - type_of_request="failed_requests", - additional_labels=FAILED_REQUESTS_LABELS, - ) - counter_total_requests = self.create_counter( - service, type_of_request="total_requests" - ) - self.payload_to_prometheus_map[service] = [ - histogram, - counter_failed_request, - counter_total_requests, - ] + metrics_to_initialize = self._get_service_metrics_initialize(service) - self.prometheus_to_amount_map: dict = ( - {} - ) # the field / value in ServiceLoggerPayload the object needs to be incremented by + # Initialize only the configured metrics for each service + if ServiceMetrics.HISTOGRAM in metrics_to_initialize: + histogram = self.create_histogram( + service.value, type_of_request="latency" + ) + if histogram: + service_metrics.append(histogram) + if ServiceMetrics.COUNTER in metrics_to_initialize: + counter_failed_request = self.create_counter( + service.value, + type_of_request="failed_requests", + additional_labels=FAILED_REQUESTS_LABELS, + ) + if counter_failed_request: + service_metrics.append(counter_failed_request) + counter_total_requests = self.create_counter( + service.value, type_of_request="total_requests" + ) + if counter_total_requests: + service_metrics.append(counter_total_requests) + + if ServiceMetrics.GAUGE in metrics_to_initialize: + gauge = self.create_gauge(service.value, type_of_request="size") + if gauge: + service_metrics.append(gauge) + + if service_metrics: + self.payload_to_prometheus_map[service.value] = service_metrics + + self.prometheus_to_amount_map: dict = {} ### MOCK TESTING ### self.mock_testing = mock_testing self.mock_testing_success_calls = 0 @@ -70,6 +91,19 @@ class PrometheusServicesLogger: print_verbose(f"Got exception on init prometheus client {str(e)}") raise e + def _get_service_metrics_initialize( + self, service: ServiceTypes + ) -> List[ServiceMetrics]: + DEFAULT_METRICS = [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM] + if service not in DEFAULT_SERVICE_CONFIGS: + return DEFAULT_METRICS + + metrics = DEFAULT_SERVICE_CONFIGS.get(service, {}).get("metrics", []) + if not metrics: + verbose_logger.debug(f"No metrics found for service {service}") + return DEFAULT_METRICS + return metrics + def is_metric_registered(self, metric_name) -> bool: for metric in self.REGISTRY.collect(): if metric_name == metric.name: @@ -94,6 +128,15 @@ class PrometheusServicesLogger: buckets=LATENCY_BUCKETS, ) + def create_gauge(self, service: str, type_of_request: str): + metric_name = "litellm_{}_{}".format(service, type_of_request) + is_registered = self.is_metric_registered(metric_name) + if is_registered: + return self._get_metric(metric_name) + return self.Gauge( + metric_name, "Gauge for {} service".format(service), labelnames=[service] + ) + def create_counter( self, service: str, @@ -120,6 +163,15 @@ class PrometheusServicesLogger: histogram.labels(labels).observe(amount) + def update_gauge( + self, + gauge, + labels: str, + amount: float, + ): + assert isinstance(gauge, self.Gauge) + gauge.labels(labels).set(amount) + def increment_counter( self, counter, @@ -190,6 +242,13 @@ class PrometheusServicesLogger: labels=payload.service.value, amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS ) + elif isinstance(obj, self.Gauge): + if payload.event_metadata: + self.update_gauge( + gauge=obj, + labels=payload.event_metadata.get("gauge_labels") or "", + amount=payload.event_metadata.get("gauge_value") or 0, + ) async def async_service_failure_hook( self, diff --git a/litellm/litellm_core_utils/credential_accessor.py b/litellm/litellm_core_utils/credential_accessor.py index d87dcc116b..45e1ea2c49 100644 --- a/litellm/litellm_core_utils/credential_accessor.py +++ b/litellm/litellm_core_utils/credential_accessor.py @@ -10,6 +10,7 @@ class CredentialAccessor: @staticmethod def get_credential_values(credential_name: str) -> dict: """Safe accessor for credentials.""" + if not litellm.credential_list: return {} for credential in litellm.credential_list: diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 037351d0e6..13103c85a0 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -3,6 +3,7 @@ from typing import Optional, Tuple import httpx import litellm +from litellm.constants import REPLICATE_MODEL_NAME_WITH_ID_LENGTH from litellm.secret_managers.main import get_secret, get_secret_str from ..types.router import LiteLLM_Params @@ -256,10 +257,13 @@ def get_llm_provider( # noqa: PLR0915 elif model in litellm.cohere_chat_models: custom_llm_provider = "cohere_chat" ## replicate - elif model in litellm.replicate_models or (":" in model and len(model) > 64): + elif model in litellm.replicate_models or ( + ":" in model and len(model) > REPLICATE_MODEL_NAME_WITH_ID_LENGTH + ): model_parts = model.split(":") if ( - len(model_parts) > 1 and len(model_parts[1]) == 64 + len(model_parts) > 1 + and len(model_parts[1]) == REPLICATE_MODEL_NAME_WITH_ID_LENGTH ): ## checks if model name has a 64 digit code - e.g. "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3" custom_llm_provider = "replicate" elif model in litellm.replicate_models: diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index ccbdb331fd..a832605b8e 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -120,7 +120,7 @@ def get_supported_openai_params( # noqa: PLR0915 elif custom_llm_provider == "replicate": return litellm.ReplicateConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "huggingface": - return litellm.HuggingfaceConfig().get_supported_openai_params(model=model) + return litellm.HuggingFaceChatConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "jina_ai": if request_type == "embeddings": return litellm.JinaAIEmbeddingConfig().get_supported_openai_params() diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 84825535c9..bf7ac1eb99 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -28,6 +28,10 @@ from litellm._logging import _is_debugging_on, verbose_logger from litellm.batches.batch_utils import _handle_completed_batch from litellm.caching.caching import DualCache, InMemoryCache from litellm.caching.caching_handler import LLMCachingHandler +from litellm.constants import ( + DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT, + DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT, +) from litellm.cost_calculator import _select_model_name_for_cost_calc from litellm.integrations.arize.arize import ArizeLogger from litellm.integrations.custom_guardrail import CustomGuardrail @@ -453,8 +457,12 @@ class Logging(LiteLLMLoggingBaseClass): non_default_params: dict, prompt_id: str, prompt_variables: Optional[dict], + prompt_management_logger: Optional[CustomLogger] = None, ) -> Tuple[str, List[AllMessageValues], dict]: - custom_logger = self.get_custom_logger_for_prompt_management(model) + custom_logger = ( + prompt_management_logger + or self.get_custom_logger_for_prompt_management(model) + ) if custom_logger: ( model, @@ -3745,9 +3753,12 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload: response_cost=response_cost, response_cost_failure_debug_info=None, status=str("success"), - total_tokens=int(30), - prompt_tokens=int(20), - completion_tokens=int(10), + total_tokens=int( + DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT + + DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT + ), + prompt_tokens=int(DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT), + completion_tokens=int(DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT), startTime=start_time, endTime=end_time, completionStartTime=completion_start_time, diff --git a/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py b/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py index 74d15e9a01..34c370ffca 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py +++ b/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py @@ -5,6 +5,7 @@ Helper utilities for tracking the cost of built-in tools. from typing import Any, Dict, List, Optional import litellm +from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS from litellm.types.llms.openai import FileSearchTool, WebSearchOptions from litellm.types.utils import ( ModelInfo, @@ -132,7 +133,7 @@ class StandardBuiltInToolCostTracking: """ if file_search is None: return 0.0 - return 2.5 / 1000 + return OPENAI_FILE_SEARCH_COST_PER_1K_CALLS @staticmethod def chat_completion_response_includes_annotations( diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py index f3f4ce6ef4..a0a99f580b 100644 --- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py +++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py @@ -9,6 +9,7 @@ from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union import litellm from litellm._logging import verbose_logger from litellm.constants import RESPONSE_FORMAT_TOOL_NAME +from litellm.types.llms.databricks import DatabricksTool from litellm.types.llms.openai import ChatCompletionThinkingBlock from litellm.types.utils import ( ChatCompletionDeltaToolCall, @@ -35,6 +36,25 @@ from litellm.types.utils import ( from .get_headers import get_response_headers +def convert_tool_call_to_json_mode( + tool_calls: List[ChatCompletionMessageToolCall], + convert_tool_call_to_json_mode: bool, +) -> Tuple[Optional[Message], Optional[str]]: + if _should_convert_tool_call_to_json_mode( + tool_calls=tool_calls, + convert_tool_call_to_json_mode=convert_tool_call_to_json_mode, + ): + # to support 'json_schema' logic on older models + json_mode_content_str: Optional[str] = tool_calls[0]["function"].get( + "arguments" + ) + if json_mode_content_str is not None: + message = litellm.Message(content=json_mode_content_str) + finish_reason = "stop" + return message, finish_reason + return None, None + + async def convert_to_streaming_response_async(response_object: Optional[dict] = None): """ Asynchronously converts a response object to a streaming response. @@ -335,21 +355,14 @@ class LiteLLMResponseObjectHandler: Only supported for HF TGI models """ transformed_logprobs: Optional[TextCompletionLogprobs] = None - if custom_llm_provider == "huggingface": - # only supported for TGI models - try: - raw_response = response._hidden_params.get("original_response", None) - transformed_logprobs = litellm.huggingface._transform_logprobs( - hf_response=raw_response - ) - except Exception as e: - verbose_logger.exception(f"LiteLLM non blocking exception: {e}") return transformed_logprobs def _should_convert_tool_call_to_json_mode( - tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None, + tool_calls: Optional[ + Union[List[ChatCompletionMessageToolCall], List[DatabricksTool]] + ] = None, convert_tool_call_to_json_mode: Optional[bool] = None, ) -> bool: """ diff --git a/litellm/litellm_core_utils/prompt_templates/common_utils.py b/litellm/litellm_core_utils/prompt_templates/common_utils.py index 4170d3c1e1..8d3845969a 100644 --- a/litellm/litellm_core_utils/prompt_templates/common_utils.py +++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py @@ -7,6 +7,7 @@ from typing import Dict, List, Literal, Optional, Union, cast from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionAssistantMessage, + ChatCompletionFileObject, ChatCompletionUserMessage, ) from litellm.types.utils import Choices, ModelResponse, StreamingChoices @@ -34,7 +35,7 @@ def handle_messages_with_content_list_to_str_conversion( def strip_name_from_messages( - messages: List[AllMessageValues], + messages: List[AllMessageValues], allowed_name_roles: List[str] = ["user"] ) -> List[AllMessageValues]: """ Removes 'name' from messages @@ -43,7 +44,7 @@ def strip_name_from_messages( for message in messages: msg_role = message.get("role") msg_copy = message.copy() - if msg_role == "user": + if msg_role not in allowed_name_roles: msg_copy.pop("name", None) # type: ignore new_messages.append(msg_copy) return new_messages @@ -292,3 +293,58 @@ def get_completion_messages( messages, assistant_continue_message, ensure_alternating_roles ) return messages + + +def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]: + """ + Gets file ids from messages + """ + file_ids = [] + for message in messages: + if message.get("role") == "user": + content = message.get("content") + if content: + if isinstance(content, str): + continue + for c in content: + if c["type"] == "file": + file_object = cast(ChatCompletionFileObject, c) + file_object_file_field = file_object["file"] + file_id = file_object_file_field.get("file_id") + if file_id: + file_ids.append(file_id) + return file_ids + + +def update_messages_with_model_file_ids( + messages: List[AllMessageValues], + model_id: str, + model_file_id_mapping: Dict[str, Dict[str, str]], +) -> List[AllMessageValues]: + """ + Updates messages with model file ids. + + model_file_id_mapping: Dict[str, Dict[str, str]] = { + "litellm_proxy/file_id": { + "model_id": "provider_file_id" + } + } + """ + for message in messages: + if message.get("role") == "user": + content = message.get("content") + if content: + if isinstance(content, str): + continue + for c in content: + if c["type"] == "file": + file_object = cast(ChatCompletionFileObject, c) + file_object_file_field = file_object["file"] + file_id = file_object_file_field.get("file_id") + if file_id: + provider_file_id = ( + model_file_id_mapping.get(file_id, {}).get(model_id) + or file_id + ) + file_object_file_field["file_id"] = provider_file_id + return messages diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 0673307929..e8d8456ed7 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -1300,20 +1300,37 @@ def convert_to_anthropic_tool_invoke( ] } """ - anthropic_tool_invoke = [ - AnthropicMessagesToolUseParam( + anthropic_tool_invoke = [] + + for tool in tool_calls: + if not get_attribute_or_key(tool, "type") == "function": + continue + + _anthropic_tool_use_param = AnthropicMessagesToolUseParam( type="tool_use", - id=get_attribute_or_key(tool, "id"), - name=get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"), + id=cast(str, get_attribute_or_key(tool, "id")), + name=cast( + str, + get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"), + ), input=json.loads( get_attribute_or_key( get_attribute_or_key(tool, "function"), "arguments" ) ), ) - for tool in tool_calls - if get_attribute_or_key(tool, "type") == "function" - ] + + _content_element = add_cache_control_to_content( + anthropic_content_element=_anthropic_tool_use_param, + orignal_content_element=dict(tool), + ) + + if "cache_control" in _content_element: + _anthropic_tool_use_param["cache_control"] = _content_element[ + "cache_control" + ] + + anthropic_tool_invoke.append(_anthropic_tool_use_param) return anthropic_tool_invoke @@ -1324,6 +1341,7 @@ def add_cache_control_to_content( AnthropicMessagesImageParam, AnthropicMessagesTextParam, AnthropicMessagesDocumentParam, + AnthropicMessagesToolUseParam, ChatCompletionThinkingBlock, ], orignal_content_element: Union[dict, AllMessageValues], diff --git a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py index 1ca2bfe45e..abe5966d31 100644 --- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py +++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py @@ -1,6 +1,6 @@ import base64 import time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, cast from litellm.types.llms.openai import ( ChatCompletionAssistantContentValue, @@ -9,7 +9,9 @@ from litellm.types.llms.openai import ( from litellm.types.utils import ( ChatCompletionAudioResponse, ChatCompletionMessageToolCall, + Choices, CompletionTokensDetails, + CompletionTokensDetailsWrapper, Function, FunctionCall, ModelResponse, @@ -203,14 +205,14 @@ class ChunkProcessor: ) def get_combined_content( - self, chunks: List[Dict[str, Any]] + self, chunks: List[Dict[str, Any]], delta_key: str = "content" ) -> ChatCompletionAssistantContentValue: content_list: List[str] = [] for chunk in chunks: choices = chunk["choices"] for choice in choices: delta = choice.get("delta", {}) - content = delta.get("content", "") + content = delta.get(delta_key, "") if content is None: continue # openai v1.0.0 sets content = None for chunks content_list.append(content) @@ -221,6 +223,11 @@ class ChunkProcessor: # Update the "content" field within the response dictionary return combined_content + def get_combined_reasoning_content( + self, chunks: List[Dict[str, Any]] + ) -> ChatCompletionAssistantContentValue: + return self.get_combined_content(chunks, delta_key="reasoning_content") + def get_combined_audio_content( self, chunks: List[Dict[str, Any]] ) -> ChatCompletionAudioResponse: @@ -296,12 +303,27 @@ class ChunkProcessor: "prompt_tokens_details": prompt_tokens_details, } + def count_reasoning_tokens(self, response: ModelResponse) -> int: + reasoning_tokens = 0 + for choice in response.choices: + if ( + hasattr(cast(Choices, choice).message, "reasoning_content") + and cast(Choices, choice).message.reasoning_content is not None + ): + reasoning_tokens += token_counter( + text=cast(Choices, choice).message.reasoning_content, + count_response_tokens=True, + ) + + return reasoning_tokens + def calculate_usage( self, chunks: List[Union[Dict[str, Any], ModelResponse]], model: str, completion_output: str, messages: Optional[List] = None, + reasoning_tokens: Optional[int] = None, ) -> Usage: """ Calculate usage for the given chunks. @@ -382,6 +404,19 @@ class ChunkProcessor: ) # for anthropic if completion_tokens_details is not None: returned_usage.completion_tokens_details = completion_tokens_details + + if reasoning_tokens is not None: + if returned_usage.completion_tokens_details is None: + returned_usage.completion_tokens_details = ( + CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens) + ) + elif ( + returned_usage.completion_tokens_details is not None + and returned_usage.completion_tokens_details.reasoning_tokens is None + ): + returned_usage.completion_tokens_details.reasoning_tokens = ( + reasoning_tokens + ) if prompt_tokens_details is not None: returned_usage.prompt_tokens_details = prompt_tokens_details diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index bc83ef5ac9..ec20a1ad4c 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -214,10 +214,7 @@ class CustomStreamWrapper: Output parse / special tokens for sagemaker + hf streaming. """ hold = False - if ( - self.custom_llm_provider != "huggingface" - and self.custom_llm_provider != "sagemaker" - ): + if self.custom_llm_provider != "sagemaker": return hold, chunk if finish_reason: @@ -290,49 +287,6 @@ class CustomStreamWrapper: except Exception as e: raise e - def handle_huggingface_chunk(self, chunk): - try: - if not isinstance(chunk, str): - chunk = chunk.decode( - "utf-8" - ) # DO NOT REMOVE this: This is required for HF inference API + Streaming - text = "" - is_finished = False - finish_reason = "" - print_verbose(f"chunk: {chunk}") - if chunk.startswith("data:"): - data_json = json.loads(chunk[5:]) - print_verbose(f"data json: {data_json}") - if "token" in data_json and "text" in data_json["token"]: - text = data_json["token"]["text"] - if data_json.get("details", False) and data_json["details"].get( - "finish_reason", False - ): - is_finished = True - finish_reason = data_json["details"]["finish_reason"] - elif data_json.get( - "generated_text", False - ): # if full generated text exists, then stream is complete - text = "" # don't return the final bos token - is_finished = True - finish_reason = "stop" - elif data_json.get("error", False): - raise Exception(data_json.get("error")) - return { - "text": text, - "is_finished": is_finished, - "finish_reason": finish_reason, - } - elif "error" in chunk: - raise ValueError(chunk) - return { - "text": text, - "is_finished": is_finished, - "finish_reason": finish_reason, - } - except Exception as e: - raise e - def handle_ai21_chunk(self, chunk): # fake streaming chunk = chunk.decode("utf-8") data_json = json.loads(chunk) @@ -1049,11 +1003,6 @@ class CustomStreamWrapper: completion_obj["content"] = response_obj["text"] if response_obj["is_finished"]: self.received_finish_reason = response_obj["finish_reason"] - elif self.custom_llm_provider and self.custom_llm_provider == "huggingface": - response_obj = self.handle_huggingface_chunk(chunk) - completion_obj["content"] = response_obj["text"] - if response_obj["is_finished"]: - self.received_finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider and self.custom_llm_provider == "predibase": response_obj = self.handle_predibase_chunk(chunk) completion_obj["content"] = response_obj["text"] diff --git a/litellm/litellm_core_utils/token_counter.py b/litellm/litellm_core_utils/token_counter.py index e6bc65ccff..afd5ab5ff4 100644 --- a/litellm/litellm_core_utils/token_counter.py +++ b/litellm/litellm_core_utils/token_counter.py @@ -11,6 +11,10 @@ from litellm.constants import ( DEFAULT_IMAGE_HEIGHT, DEFAULT_IMAGE_TOKEN_COUNT, DEFAULT_IMAGE_WIDTH, + MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES, + MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES, + MAX_TILE_HEIGHT, + MAX_TILE_WIDTH, ) from litellm.llms.custom_httpx.http_handler import _get_httpx_client @@ -97,11 +101,14 @@ def resize_image_high_res( height: int, ) -> Tuple[int, int]: # Maximum dimensions for high res mode - max_short_side = 768 - max_long_side = 2000 + max_short_side = MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES + max_long_side = MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES # Return early if no resizing is needed - if width <= 768 and height <= 768: + if ( + width <= MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES + and height <= MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES + ): return width, height # Determine the longer and shorter sides @@ -132,7 +139,10 @@ def resize_image_high_res( # Test the function with the given example def calculate_tiles_needed( - resized_width, resized_height, tile_width=512, tile_height=512 + resized_width, + resized_height, + tile_width=MAX_TILE_WIDTH, + tile_height=MAX_TILE_HEIGHT, ): tiles_across = (resized_width + tile_width - 1) // tile_width tiles_down = (resized_height + tile_height - 1) // tile_height diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 7625292e6e..c29a98b217 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -21,7 +21,6 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, ) from litellm.types.llms.anthropic import ( - AnthropicChatCompletionUsageBlock, ContentBlockDelta, ContentBlockStart, ContentBlockStop, @@ -32,13 +31,13 @@ from litellm.types.llms.anthropic import ( from litellm.types.llms.openai import ( ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, - ChatCompletionUsageBlock, ) from litellm.types.utils import ( Delta, GenericStreamingChunk, ModelResponseStream, StreamingChoices, + Usage, ) from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager @@ -487,10 +486,8 @@ class ModelResponseIterator: return True return False - def _handle_usage( - self, anthropic_usage_chunk: Union[dict, UsageDelta] - ) -> AnthropicChatCompletionUsageBlock: - usage_block = AnthropicChatCompletionUsageBlock( + def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage: + usage_block = Usage( prompt_tokens=anthropic_usage_chunk.get("input_tokens", 0), completion_tokens=anthropic_usage_chunk.get("output_tokens", 0), total_tokens=anthropic_usage_chunk.get("input_tokens", 0) @@ -581,7 +578,7 @@ class ModelResponseIterator: text = "" tool_use: Optional[ChatCompletionToolCallChunk] = None finish_reason = "" - usage: Optional[ChatCompletionUsageBlock] = None + usage: Optional[Usage] = None provider_specific_fields: Dict[str, Any] = {} reasoning_content: Optional[str] = None thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 09096c89e7..8a2048f95a 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -5,7 +5,10 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast import httpx import litellm -from litellm.constants import RESPONSE_FORMAT_TOOL_NAME +from litellm.constants import ( + DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS, + RESPONSE_FORMAT_TOOL_NAME, +) from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt from litellm.llms.base_llm.base_utils import type_to_response_format_param @@ -30,9 +33,16 @@ from litellm.types.llms.openai import ( ChatCompletionToolCallFunctionChunk, ChatCompletionToolParam, ) +from litellm.types.utils import CompletionTokensDetailsWrapper from litellm.types.utils import Message as LitellmMessage from litellm.types.utils import PromptTokensDetailsWrapper -from litellm.utils import ModelResponse, Usage, add_dummy_tool, has_tool_call_blocks +from litellm.utils import ( + ModelResponse, + Usage, + add_dummy_tool, + has_tool_call_blocks, + token_counter, +) from ..common_utils import AnthropicError, process_anthropic_headers @@ -53,7 +63,7 @@ class AnthropicConfig(BaseConfig): max_tokens: Optional[ int - ] = 4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default) + ] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default) stop_sequences: Optional[list] = None temperature: Optional[int] = None top_p: Optional[int] = None @@ -65,7 +75,7 @@ class AnthropicConfig(BaseConfig): self, max_tokens: Optional[ int - ] = 4096, # You can pass in a value yourself or use the default value 4096 + ] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS, # You can pass in a value yourself or use the default value 4096 stop_sequences: Optional[list] = None, temperature: Optional[int] = None, top_p: Optional[int] = None, @@ -309,6 +319,33 @@ class AnthropicConfig(BaseConfig): else: raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}") + def map_response_format_to_anthropic_tool( + self, value: Optional[dict], optional_params: dict, is_thinking_enabled: bool + ) -> Optional[AnthropicMessagesTool]: + ignore_response_format_types = ["text"] + if ( + value is None or value["type"] in ignore_response_format_types + ): # value is a no-op + return None + + json_schema: Optional[dict] = None + if "response_schema" in value: + json_schema = value["response_schema"] + elif "json_schema" in value: + json_schema = value["json_schema"]["schema"] + """ + When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode + - You usually want to provide a single tool + - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool + - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective. + """ + + _tool = self._create_json_tool_call_for_response_format( + json_schema=json_schema, + ) + + return _tool + def map_openai_params( self, non_default_params: dict, @@ -352,34 +389,18 @@ class AnthropicConfig(BaseConfig): if param == "top_p": optional_params["top_p"] = value if param == "response_format" and isinstance(value, dict): - ignore_response_format_types = ["text"] - if value["type"] in ignore_response_format_types: # value is a no-op + _tool = self.map_response_format_to_anthropic_tool( + value, optional_params, is_thinking_enabled + ) + if _tool is None: continue - - json_schema: Optional[dict] = None - if "response_schema" in value: - json_schema = value["response_schema"] - elif "json_schema" in value: - json_schema = value["json_schema"]["schema"] - """ - When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode - - You usually want to provide a single tool - - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool - - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective. - """ - if not is_thinking_enabled: _tool_choice = {"name": RESPONSE_FORMAT_TOOL_NAME, "type": "tool"} optional_params["tool_choice"] = _tool_choice - - _tool = self._create_json_tool_call_for_response_format( - json_schema=json_schema, - ) + optional_params["json_mode"] = True optional_params = self._add_tools_to_optional_params( optional_params=optional_params, tools=[_tool] ) - - optional_params["json_mode"] = True if param == "user": optional_params["metadata"] = {"user_id": value} if param == "thinking": @@ -769,6 +790,15 @@ class AnthropicConfig(BaseConfig): prompt_tokens_details = PromptTokensDetailsWrapper( cached_tokens=cache_read_input_tokens ) + completion_token_details = ( + CompletionTokensDetailsWrapper( + reasoning_tokens=token_counter( + text=reasoning_content, count_response_tokens=True + ) + ) + if reasoning_content + else None + ) total_tokens = prompt_tokens + completion_tokens usage = Usage( prompt_tokens=prompt_tokens, @@ -777,6 +807,7 @@ class AnthropicConfig(BaseConfig): prompt_tokens_details=prompt_tokens_details, cache_creation_input_tokens=cache_creation_input_tokens, cache_read_input_tokens=cache_read_input_tokens, + completion_tokens_details=completion_token_details, ) setattr(model_response, "usage", usage) # type: ignore diff --git a/litellm/llms/anthropic/completion/transformation.py b/litellm/llms/anthropic/completion/transformation.py index 5cbc0b5fd8..e4e04df4d6 100644 --- a/litellm/llms/anthropic/completion/transformation.py +++ b/litellm/llms/anthropic/completion/transformation.py @@ -11,6 +11,7 @@ from typing import AsyncIterator, Dict, Iterator, List, Optional, Union import httpx import litellm +from litellm.constants import DEFAULT_MAX_TOKENS from litellm.litellm_core_utils.prompt_templates.factory import ( custom_prompt, prompt_factory, @@ -65,7 +66,9 @@ class AnthropicTextConfig(BaseConfig): def __init__( self, - max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default + max_tokens_to_sample: Optional[ + int + ] = DEFAULT_MAX_TOKENS, # anthropic requires a default stop_sequences: Optional[list] = None, temperature: Optional[int] = None, top_p: Optional[int] = None, diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index aed813fdab..bb60680ebc 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -7,7 +7,7 @@ import httpx # type: ignore from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI import litellm -from litellm.constants import DEFAULT_MAX_RETRIES +from litellm.constants import AZURE_OPERATION_POLLING_TIMEOUT, DEFAULT_MAX_RETRIES from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.logging_utils import track_llm_api_timing from litellm.llms.custom_httpx.http_handler import ( @@ -857,7 +857,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM): await response.aread() - timeout_secs: int = 120 + timeout_secs: int = AZURE_OPERATION_POLLING_TIMEOUT start_time = time.time() if "status" not in response.json(): raise Exception( @@ -955,7 +955,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM): response.read() - timeout_secs: int = 120 + timeout_secs: int = AZURE_OPERATION_POLLING_TIMEOUT start_time = time.time() if "status" not in response.json(): raise Exception( diff --git a/litellm/llms/azure/chat/gpt_transformation.py b/litellm/llms/azure/chat/gpt_transformation.py index ee85517e66..e30d68f97d 100644 --- a/litellm/llms/azure/chat/gpt_transformation.py +++ b/litellm/llms/azure/chat/gpt_transformation.py @@ -7,6 +7,10 @@ from litellm.litellm_core_utils.prompt_templates.factory import ( convert_to_azure_openai_messages, ) from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.types.llms.azure import ( + API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT, + API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT, +) from litellm.types.utils import ModelResponse from litellm.utils import supports_response_schema @@ -123,7 +127,10 @@ class AzureOpenAIConfig(BaseConfig): - check if api_version is supported for response_format """ - is_supported = int(api_version_year) <= 2024 and int(api_version_month) >= 8 + is_supported = ( + int(api_version_year) <= API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT + and int(api_version_month) >= API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT + ) return is_supported diff --git a/litellm/llms/azure/chat/o_series_transformation.py b/litellm/llms/azure/chat/o_series_transformation.py index 0ca3a28d23..21aafce7fb 100644 --- a/litellm/llms/azure/chat/o_series_transformation.py +++ b/litellm/llms/azure/chat/o_series_transformation.py @@ -14,6 +14,7 @@ Translations handled by LiteLLM: from typing import List, Optional +import litellm from litellm import verbose_logger from litellm.types.llms.openai import AllMessageValues from litellm.utils import get_model_info @@ -22,6 +23,27 @@ from ...openai.chat.o_series_transformation import OpenAIOSeriesConfig class AzureOpenAIO1Config(OpenAIOSeriesConfig): + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the Azure O-Series models + """ + all_openai_params = litellm.OpenAIGPTConfig().get_supported_openai_params( + model=model + ) + non_supported_params = [ + "logprobs", + "top_p", + "presence_penalty", + "frequency_penalty", + "top_logprobs", + ] + + o_series_only_param = ["reasoning_effort"] + all_openai_params.extend(o_series_only_param) + return [ + param for param in all_openai_params if param not in non_supported_params + ] + def should_fake_stream( self, model: Optional[str], diff --git a/litellm/llms/azure/files/handler.py b/litellm/llms/azure/files/handler.py index 5e105374b2..50c122ccf2 100644 --- a/litellm/llms/azure/files/handler.py +++ b/litellm/llms/azure/files/handler.py @@ -28,11 +28,11 @@ class AzureOpenAIFilesAPI(BaseAzureLLM): self, create_file_data: CreateFileRequest, openai_client: AsyncAzureOpenAI, - ) -> FileObject: + ) -> OpenAIFileObject: verbose_logger.debug("create_file_data=%s", create_file_data) response = await openai_client.files.create(**create_file_data) verbose_logger.debug("create_file_response=%s", response) - return response + return OpenAIFileObject(**response.model_dump()) def create_file( self, @@ -66,7 +66,7 @@ class AzureOpenAIFilesAPI(BaseAzureLLM): raise ValueError( "AzureOpenAI client is not an instance of AsyncAzureOpenAI. Make sure you passed an AsyncAzureOpenAI client." ) - return self.acreate_file( # type: ignore + return self.acreate_file( create_file_data=create_file_data, openai_client=openai_client ) response = cast(AzureOpenAI, openai_client).files.create(**create_file_data) diff --git a/litellm/llms/base_llm/base_model_iterator.py b/litellm/llms/base_llm/base_model_iterator.py index 67b1466c2a..90dcc52fef 100644 --- a/litellm/llms/base_llm/base_model_iterator.py +++ b/litellm/llms/base_llm/base_model_iterator.py @@ -2,6 +2,7 @@ import json from abc import abstractmethod from typing import Optional, Union +import litellm from litellm.types.utils import GenericStreamingChunk, ModelResponseStream @@ -33,6 +34,18 @@ class BaseModelResponseIterator: self, str_line: str ) -> Union[GenericStreamingChunk, ModelResponseStream]: # chunk is a str at this point + + stripped_chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk( + str_line + ) + try: + if stripped_chunk is not None: + stripped_json_chunk: Optional[dict] = json.loads(stripped_chunk) + else: + stripped_json_chunk = None + except json.JSONDecodeError: + stripped_json_chunk = None + if "[DONE]" in str_line: return GenericStreamingChunk( text="", @@ -42,9 +55,8 @@ class BaseModelResponseIterator: index=0, tool_use=None, ) - elif str_line.startswith("data:"): - data_json = json.loads(str_line[5:]) - return self.chunk_parser(chunk=data_json) + elif stripped_json_chunk: + return self.chunk_parser(chunk=stripped_json_chunk) else: return GenericStreamingChunk( text="", @@ -85,6 +97,7 @@ class BaseModelResponseIterator: async def __anext__(self): try: chunk = await self.async_response_iterator.__anext__() + except StopAsyncIteration: raise StopAsyncIteration except ValueError as e: @@ -99,7 +112,9 @@ class BaseModelResponseIterator: str_line = str_line[index:] # chunk is a str at this point - return self._handle_string_chunk(str_line=str_line) + chunk = self._handle_string_chunk(str_line=str_line) + + return chunk except StopAsyncIteration: raise StopAsyncIteration except ValueError as e: diff --git a/litellm/llms/base_llm/base_utils.py b/litellm/llms/base_llm/base_utils.py index cef64d01e3..5b175f4756 100644 --- a/litellm/llms/base_llm/base_utils.py +++ b/litellm/llms/base_llm/base_utils.py @@ -3,6 +3,7 @@ Utility functions for base LLM classes. """ import copy +import json from abc import ABC, abstractmethod from typing import List, Optional, Type, Union @@ -10,8 +11,8 @@ from openai.lib import _parsing, _pydantic from pydantic import BaseModel from litellm._logging import verbose_logger -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import ProviderSpecificModelInfo +from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolCallChunk +from litellm.types.utils import Message, ProviderSpecificModelInfo class BaseLLMModelInfo(ABC): @@ -55,6 +56,32 @@ class BaseLLMModelInfo(ABC): pass +def _convert_tool_response_to_message( + tool_calls: List[ChatCompletionToolCallChunk], +) -> Optional[Message]: + """ + In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format + + """ + ## HANDLE JSON MODE - anthropic returns single function call + json_mode_content_str: Optional[str] = tool_calls[0]["function"].get("arguments") + try: + if json_mode_content_str is not None: + args = json.loads(json_mode_content_str) + if isinstance(args, dict) and (values := args.get("values")) is not None: + _message = Message(content=json.dumps(values)) + return _message + else: + # a lot of the times the `values` key is not present in the tool response + # relevant issue: https://github.com/BerriAI/litellm/issues/6741 + _message = Message(content=json.dumps(args)) + return _message + except json.JSONDecodeError: + # json decode error does occur, return the original tool response str + return Message(content=json_mode_content_str) + return None + + def _dict_to_response_format_helper( response_format: dict, ref_template: Optional[str] = None ) -> dict: diff --git a/litellm/llms/bedrock/base_aws_llm.py b/litellm/llms/bedrock/base_aws_llm.py index 5482d80687..133ef6a952 100644 --- a/litellm/llms/bedrock/base_aws_llm.py +++ b/litellm/llms/bedrock/base_aws_llm.py @@ -9,7 +9,7 @@ from pydantic import BaseModel from litellm._logging import verbose_logger from litellm.caching.caching import DualCache -from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL +from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL, BEDROCK_MAX_POLICY_SIZE from litellm.litellm_core_utils.dd_tracing import tracer from litellm.secret_managers.main import get_secret @@ -381,7 +381,7 @@ class BaseAWSLLM: "region_name": aws_region_name, } - if sts_response["PackedPolicySize"] > 75: + if sts_response["PackedPolicySize"] > BEDROCK_MAX_POLICY_SIZE: verbose_logger.warning( f"The policy size is greater than 75% of the allowed size, PackedPolicySize: {sts_response['PackedPolicySize']}" ) diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index 16693004e4..66324e840e 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -368,6 +368,7 @@ class BaseLLMHTTPHandler: else None ), litellm_params=litellm_params, + json_mode=json_mode, ) return CustomStreamWrapper( completion_stream=completion_stream, @@ -420,6 +421,7 @@ class BaseLLMHTTPHandler: timeout: Union[float, httpx.Timeout], fake_stream: bool = False, client: Optional[HTTPHandler] = None, + json_mode: bool = False, ) -> Tuple[Any, dict]: if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( @@ -447,11 +449,15 @@ class BaseLLMHTTPHandler: if fake_stream is True: completion_stream = provider_config.get_model_response_iterator( - streaming_response=response.json(), sync_stream=True + streaming_response=response.json(), + sync_stream=True, + json_mode=json_mode, ) else: completion_stream = provider_config.get_model_response_iterator( - streaming_response=response.iter_lines(), sync_stream=True + streaming_response=response.iter_lines(), + sync_stream=True, + json_mode=json_mode, ) # LOGGING diff --git a/litellm/llms/databricks/chat/handler.py b/litellm/llms/databricks/chat/handler.py deleted file mode 100644 index abb714746c..0000000000 --- a/litellm/llms/databricks/chat/handler.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Handles the chat completion request for Databricks -""" - -from typing import Callable, List, Optional, Union, cast - -from httpx._config import Timeout - -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import CustomStreamingDecoder -from litellm.utils import ModelResponse - -from ...openai_like.chat.handler import OpenAILikeChatHandler -from ..common_utils import DatabricksBase -from .transformation import DatabricksConfig - - -class DatabricksChatCompletion(OpenAILikeChatHandler, DatabricksBase): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def completion( - self, - *, - model: str, - messages: list, - api_base: str, - custom_llm_provider: str, - custom_prompt_dict: dict, - model_response: ModelResponse, - print_verbose: Callable, - encoding, - api_key: Optional[str], - logging_obj, - optional_params: dict, - acompletion=None, - litellm_params=None, - logger_fn=None, - headers: Optional[dict] = None, - timeout: Optional[Union[float, Timeout]] = None, - client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, - custom_endpoint: Optional[bool] = None, - streaming_decoder: Optional[CustomStreamingDecoder] = None, - fake_stream: bool = False, - ): - messages = DatabricksConfig()._transform_messages( - messages=cast(List[AllMessageValues], messages), model=model - ) - api_base, headers = self.databricks_validate_environment( - api_base=api_base, - api_key=api_key, - endpoint_type="chat_completions", - custom_endpoint=custom_endpoint, - headers=headers, - ) - - if optional_params.get("stream") is True: - fake_stream = DatabricksConfig()._should_fake_stream(optional_params) - else: - fake_stream = False - - return super().completion( - model=model, - messages=messages, - api_base=api_base, - custom_llm_provider=custom_llm_provider, - custom_prompt_dict=custom_prompt_dict, - model_response=model_response, - print_verbose=print_verbose, - encoding=encoding, - api_key=api_key, - logging_obj=logging_obj, - optional_params=optional_params, - acompletion=acompletion, - litellm_params=litellm_params, - logger_fn=logger_fn, - headers=headers, - timeout=timeout, - client=client, - custom_endpoint=True, - streaming_decoder=streaming_decoder, - fake_stream=fake_stream, - ) diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 94e0203459..09c87a9168 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -2,21 +2,68 @@ Translates from OpenAI's `/v1/chat/completions` to Databricks' `/chat/completions` """ -from typing import List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + AsyncIterator, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) +import httpx from pydantic import BaseModel +from litellm.constants import RESPONSE_FORMAT_TOOL_NAME +from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import ( + _handle_invalid_parallel_tool_calls, + _should_convert_tool_call_to_json_mode, +) from litellm.litellm_core_utils.prompt_templates.common_utils import ( handle_messages_with_content_list_to_str_conversion, strip_name_from_messages, ) -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import ProviderField +from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator +from litellm.types.llms.anthropic import AnthropicMessagesTool +from litellm.types.llms.databricks import ( + AllDatabricksContentValues, + DatabricksChoice, + DatabricksFunction, + DatabricksResponse, + DatabricksTool, +) +from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionThinkingBlock, + ChatCompletionToolChoiceFunctionParam, + ChatCompletionToolChoiceObjectParam, +) +from litellm.types.utils import ( + ChatCompletionMessageToolCall, + Choices, + Message, + ModelResponse, + ModelResponseStream, + ProviderField, + Usage, +) +from ...anthropic.chat.transformation import AnthropicConfig from ...openai_like.chat.transformation import OpenAILikeChatConfig +from ..common_utils import DatabricksBase, DatabricksException + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any -class DatabricksConfig(OpenAILikeChatConfig): +class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): """ Reference: https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#chat-request """ @@ -63,6 +110,39 @@ class DatabricksConfig(OpenAILikeChatConfig): ), ] + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + api_base, headers = self.databricks_validate_environment( + api_base=api_base, + api_key=api_key, + endpoint_type="chat_completions", + custom_endpoint=False, + headers=headers, + ) + # Ensure Content-Type header is set + headers["Content-Type"] = "application/json" + return headers + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + api_base = self._get_api_base(api_base) + complete_url = f"{api_base}/chat/completions" + return complete_url + def get_supported_openai_params(self, model: Optional[str] = None) -> list: return [ "stream", @@ -75,8 +155,98 @@ class DatabricksConfig(OpenAILikeChatConfig): "response_format", "tools", "tool_choice", + "reasoning_effort", + "thinking", ] + def convert_anthropic_tool_to_databricks_tool( + self, tool: Optional[AnthropicMessagesTool] + ) -> Optional[DatabricksTool]: + if tool is None: + return None + + return DatabricksTool( + type="function", + function=DatabricksFunction( + name=tool["name"], + parameters=cast(dict, tool.get("input_schema") or {}), + ), + ) + + def map_response_format_to_databricks_tool( + self, + model: str, + value: Optional[dict], + optional_params: dict, + is_thinking_enabled: bool, + ) -> Optional[DatabricksTool]: + if value is None: + return None + + tool = self.map_response_format_to_anthropic_tool( + value, optional_params, is_thinking_enabled + ) + + databricks_tool = self.convert_anthropic_tool_to_databricks_tool(tool) + return databricks_tool + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + replace_max_completion_tokens_with_max_tokens: bool = True, + ) -> dict: + is_thinking_enabled = self.is_thinking_enabled(non_default_params) + mapped_params = super().map_openai_params( + non_default_params, optional_params, model, drop_params + ) + if ( + "max_completion_tokens" in non_default_params + and replace_max_completion_tokens_with_max_tokens + ): + mapped_params["max_tokens"] = non_default_params[ + "max_completion_tokens" + ] # most openai-compatible providers support 'max_tokens' not 'max_completion_tokens' + mapped_params.pop("max_completion_tokens", None) + + if "response_format" in non_default_params and "claude" in model: + _tool = self.map_response_format_to_databricks_tool( + model, + non_default_params["response_format"], + mapped_params, + is_thinking_enabled, + ) + + if _tool is not None: + self._add_tools_to_optional_params( + optional_params=optional_params, tools=[_tool] + ) + optional_params["json_mode"] = True + if not is_thinking_enabled: + _tool_choice = ChatCompletionToolChoiceObjectParam( + type="function", + function=ChatCompletionToolChoiceFunctionParam( + name=RESPONSE_FORMAT_TOOL_NAME + ), + ) + optional_params["tool_choice"] = _tool_choice + optional_params.pop( + "response_format", None + ) # unsupported for claude models - if json_schema -> convert to tool call + + if "reasoning_effort" in non_default_params and "claude" in model: + optional_params["thinking"] = AnthropicConfig._map_reasoning_effort( + non_default_params.get("reasoning_effort") + ) + ## handle thinking tokens + self.update_optional_params_with_thinking_tokens( + non_default_params=non_default_params, optional_params=mapped_params + ) + + return mapped_params + def _should_fake_stream(self, optional_params: dict) -> bool: """ Databricks doesn't support 'response_format' while streaming @@ -104,3 +274,259 @@ class DatabricksConfig(OpenAILikeChatConfig): new_messages = handle_messages_with_content_list_to_str_conversion(new_messages) new_messages = strip_name_from_messages(new_messages) return super()._transform_messages(messages=new_messages, model=model) + + @staticmethod + def extract_content_str( + content: Optional[AllDatabricksContentValues], + ) -> Optional[str]: + if content is None: + return None + if isinstance(content, str): + return content + elif isinstance(content, list): + content_str = "" + for item in content: + if item["type"] == "text": + content_str += item["text"] + return content_str + else: + raise Exception(f"Unsupported content type: {type(content)}") + + @staticmethod + def extract_reasoning_content( + content: Optional[AllDatabricksContentValues], + ) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]: + """ + Extract and return the reasoning content and thinking blocks + """ + if content is None: + return None, None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + reasoning_content: Optional[str] = None + if isinstance(content, list): + for item in content: + if item["type"] == "reasoning": + for sum in item["summary"]: + if reasoning_content is None: + reasoning_content = "" + reasoning_content += sum["text"] + thinking_block = ChatCompletionThinkingBlock( + type="thinking", + thinking=sum["text"], + signature=sum["signature"], + ) + if thinking_blocks is None: + thinking_blocks = [] + thinking_blocks.append(thinking_block) + return reasoning_content, thinking_blocks + + def _transform_choices( + self, choices: List[DatabricksChoice], json_mode: Optional[bool] = None + ) -> List[Choices]: + transformed_choices = [] + + for choice in choices: + ## HANDLE JSON MODE - anthropic returns single function call] + tool_calls = choice["message"].get("tool_calls", None) + if tool_calls is not None: + _openai_tool_calls = [] + for _tc in tool_calls: + _openai_tc = ChatCompletionMessageToolCall(**_tc) # type: ignore + _openai_tool_calls.append(_openai_tc) + fixed_tool_calls = _handle_invalid_parallel_tool_calls( + _openai_tool_calls + ) + + if fixed_tool_calls is not None: + tool_calls = fixed_tool_calls + + translated_message: Optional[Message] = None + finish_reason: Optional[str] = None + if tool_calls and _should_convert_tool_call_to_json_mode( + tool_calls=tool_calls, + convert_tool_call_to_json_mode=json_mode, + ): + # to support response_format on claude models + json_mode_content_str: Optional[str] = ( + str(tool_calls[0]["function"].get("arguments", "")) or None + ) + if json_mode_content_str is not None: + translated_message = Message(content=json_mode_content_str) + finish_reason = "stop" + + if translated_message is None: + ## get the content str + content_str = DatabricksConfig.extract_content_str( + choice["message"]["content"] + ) + + ## get the reasoning content + ( + reasoning_content, + thinking_blocks, + ) = DatabricksConfig.extract_reasoning_content( + choice["message"].get("content") + ) + + translated_message = Message( + role="assistant", + content=content_str, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks, + tool_calls=choice["message"].get("tool_calls"), + ) + + if finish_reason is None: + finish_reason = choice["finish_reason"] + + translated_choice = Choices( + finish_reason=finish_reason, + index=choice["index"], + message=translated_message, + logprobs=None, + enhancements=None, + ) + + transformed_choices.append(translated_choice) + + return transformed_choices + + def transform_response( + self, + model: str, + raw_response: httpx.Response, + model_response: ModelResponse, + logging_obj: LiteLLMLoggingObj, + request_data: dict, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + encoding: Any, + api_key: Optional[str] = None, + json_mode: Optional[bool] = None, + ) -> ModelResponse: + ## LOGGING + logging_obj.post_call( + input=messages, + api_key=api_key, + original_response=raw_response.text, + additional_args={"complete_input_dict": request_data}, + ) + + ## RESPONSE OBJECT + try: + completion_response = DatabricksResponse(**raw_response.json()) # type: ignore + except Exception as e: + response_headers = getattr(raw_response, "headers", None) + raise DatabricksException( + message="Unable to get json response - {}, Original Response: {}".format( + str(e), raw_response.text + ), + status_code=raw_response.status_code, + headers=response_headers, + ) + + model_response.model = completion_response["model"] + model_response.id = completion_response["id"] + model_response.created = completion_response["created"] + setattr(model_response, "usage", Usage(**completion_response["usage"])) + + model_response.choices = self._transform_choices( # type: ignore + choices=completion_response["choices"], + json_mode=json_mode, + ) + + return model_response + + def get_model_response_iterator( + self, + streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], + sync_stream: bool, + json_mode: Optional[bool] = False, + ): + return DatabricksChatResponseIterator( + streaming_response=streaming_response, + sync_stream=sync_stream, + json_mode=json_mode, + ) + + +class DatabricksChatResponseIterator(BaseModelResponseIterator): + def __init__( + self, + streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], + sync_stream: bool, + json_mode: Optional[bool] = False, + ): + super().__init__(streaming_response, sync_stream) + + self.json_mode = json_mode + self._last_function_name = None # Track the last seen function name + + def chunk_parser(self, chunk: dict) -> ModelResponseStream: + try: + translated_choices = [] + for choice in chunk["choices"]: + tool_calls = choice["delta"].get("tool_calls") + if tool_calls and self.json_mode: + # 1. Check if the function name is set and == RESPONSE_FORMAT_TOOL_NAME + # 2. If no function name, just args -> check last function name (saved via state variable) + # 3. Convert args to json + # 4. Convert json to message + # 5. Set content to message.content + # 6. Set tool_calls to None + from litellm.constants import RESPONSE_FORMAT_TOOL_NAME + from litellm.llms.base_llm.base_utils import ( + _convert_tool_response_to_message, + ) + + # Check if this chunk has a function name + function_name = tool_calls[0].get("function", {}).get("name") + if function_name is not None: + self._last_function_name = function_name + + # If we have a saved function name that matches RESPONSE_FORMAT_TOOL_NAME + # or this chunk has the matching function name + if ( + self._last_function_name == RESPONSE_FORMAT_TOOL_NAME + or function_name == RESPONSE_FORMAT_TOOL_NAME + ): + # Convert tool calls to message format + message = _convert_tool_response_to_message(tool_calls) + if message is not None: + if message.content == "{}": # empty json + message.content = "" + choice["delta"]["content"] = message.content + choice["delta"]["tool_calls"] = None + + # extract the content str + content_str = DatabricksConfig.extract_content_str( + choice["delta"].get("content") + ) + + # extract the reasoning content + ( + reasoning_content, + thinking_blocks, + ) = DatabricksConfig.extract_reasoning_content( + choice["delta"]["content"] + ) + + choice["delta"]["content"] = content_str + choice["delta"]["reasoning_content"] = reasoning_content + choice["delta"]["thinking_blocks"] = thinking_blocks + translated_choices.append(choice) + return ModelResponseStream( + id=chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + model=chunk["model"], + choices=translated_choices, + ) + except KeyError as e: + raise DatabricksException( + message=f"KeyError: {e}, Got unexpected response from Databricks: {chunk}", + status_code=400, + ) + except Exception as e: + raise e diff --git a/litellm/llms/databricks/common_utils.py b/litellm/llms/databricks/common_utils.py index 76bd281d4d..eab9e2f825 100644 --- a/litellm/llms/databricks/common_utils.py +++ b/litellm/llms/databricks/common_utils.py @@ -1,9 +1,35 @@ from typing import Literal, Optional, Tuple -from .exceptions import DatabricksError +from litellm.llms.base_llm.chat.transformation import BaseLLMException + + +class DatabricksException(BaseLLMException): + pass class DatabricksBase: + def _get_api_base(self, api_base: Optional[str]) -> str: + if api_base is None: + try: + from databricks.sdk import WorkspaceClient + + databricks_client = WorkspaceClient() + + api_base = ( + api_base or f"{databricks_client.config.host}/serving-endpoints" + ) + + return api_base + except ImportError: + raise DatabricksException( + status_code=400, + message=( + "Either set the DATABRICKS_API_BASE and DATABRICKS_API_KEY environment variables, " + "or install the databricks-sdk Python library." + ), + ) + return api_base + def _get_databricks_credentials( self, api_key: Optional[str], api_base: Optional[str], headers: Optional[dict] ) -> Tuple[str, dict]: @@ -23,7 +49,7 @@ class DatabricksBase: return api_base, headers except ImportError: - raise DatabricksError( + raise DatabricksException( status_code=400, message=( "If the Databricks base URL and API key are not set, the databricks-sdk " @@ -41,9 +67,9 @@ class DatabricksBase: custom_endpoint: Optional[bool], headers: Optional[dict], ) -> Tuple[str, dict]: - if api_key is None and headers is None: + if api_key is None and not headers: # handle empty headers if custom_endpoint is not None: - raise DatabricksError( + raise DatabricksException( status_code=400, message="Missing API Key - A call is being made to LLM Provider but no key is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params", ) @@ -54,7 +80,7 @@ class DatabricksBase: if api_base is None: if custom_endpoint: - raise DatabricksError( + raise DatabricksException( status_code=400, message="Missing API Base - A call is being made to LLM Provider but no api base is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params", ) diff --git a/litellm/llms/databricks/exceptions.py b/litellm/llms/databricks/exceptions.py deleted file mode 100644 index 8bb3d435d0..0000000000 --- a/litellm/llms/databricks/exceptions.py +++ /dev/null @@ -1,12 +0,0 @@ -import httpx - - -class DatabricksError(Exception): - def __init__(self, status_code, message): - self.status_code = status_code - self.message = message - self.request = httpx.Request(method="POST", url="https://docs.databricks.com/") - self.response = httpx.Response(status_code=status_code, request=self.request) - super().__init__( - self.message - ) # Call the base class constructor with the parameters it needs diff --git a/litellm/llms/deepinfra/chat/transformation.py b/litellm/llms/deepinfra/chat/transformation.py index 429759fad1..0d446d39b9 100644 --- a/litellm/llms/deepinfra/chat/transformation.py +++ b/litellm/llms/deepinfra/chat/transformation.py @@ -1,6 +1,7 @@ from typing import Optional, Tuple, Union import litellm +from litellm.constants import MIN_NON_ZERO_TEMPERATURE from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.secret_managers.main import get_secret_str @@ -84,7 +85,7 @@ class DeepInfraConfig(OpenAIGPTConfig): and value == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1" ): # this model does no support temperature == 0 - value = 0.0001 # close to 0 + value = MIN_NON_ZERO_TEMPERATURE # close to 0 if param == "tool_choice": if ( value != "auto" and value != "none" diff --git a/litellm/llms/fireworks_ai/cost_calculator.py b/litellm/llms/fireworks_ai/cost_calculator.py index f53aba4a47..31414625ab 100644 --- a/litellm/llms/fireworks_ai/cost_calculator.py +++ b/litellm/llms/fireworks_ai/cost_calculator.py @@ -4,6 +4,12 @@ For calculating cost of fireworks ai serverless inference models. from typing import Tuple +from litellm.constants import ( + FIREWORKS_AI_16_B, + FIREWORKS_AI_56_B_MOE, + FIREWORKS_AI_80_B, + FIREWORKS_AI_176_B_MOE, +) from litellm.types.utils import Usage from litellm.utils import get_model_info @@ -25,9 +31,9 @@ def get_base_model_for_pricing(model_name: str) -> str: moe_match = re.search(r"(\d+)x(\d+)b", model_name) if moe_match: total_billion = int(moe_match.group(1)) * int(moe_match.group(2)) - if total_billion <= 56: + if total_billion <= FIREWORKS_AI_56_B_MOE: return "fireworks-ai-moe-up-to-56b" - elif total_billion <= 176: + elif total_billion <= FIREWORKS_AI_176_B_MOE: return "fireworks-ai-56b-to-176b" # Check for standard models in the form b @@ -37,9 +43,9 @@ def get_base_model_for_pricing(model_name: str) -> str: params_billion = float(params_match) # Determine the category based on the number of parameters - if params_billion <= 16.0: + if params_billion <= FIREWORKS_AI_16_B: return "fireworks-ai-up-to-16b" - elif params_billion <= 80.0: + elif params_billion <= FIREWORKS_AI_80_B: return "fireworks-ai-16b-80b" # If no matches, return the original model_name diff --git a/litellm/llms/gemini/chat/transformation.py b/litellm/llms/gemini/chat/transformation.py index 0d5956122e..795333d598 100644 --- a/litellm/llms/gemini/chat/transformation.py +++ b/litellm/llms/gemini/chat/transformation.py @@ -81,6 +81,7 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig): "stop", "logprobs", "frequency_penalty", + "modalities", ] def map_openai_params( diff --git a/litellm/llms/huggingface/chat/handler.py b/litellm/llms/huggingface/chat/handler.py deleted file mode 100644 index 2b65e5b7da..0000000000 --- a/litellm/llms/huggingface/chat/handler.py +++ /dev/null @@ -1,769 +0,0 @@ -## Uses the huggingface text generation inference API -import json -import os -from typing import ( - Any, - Callable, - Dict, - List, - Literal, - Optional, - Tuple, - Union, - cast, - get_args, -) - -import httpx - -import litellm -from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj -from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper -from litellm.llms.custom_httpx.http_handler import ( - AsyncHTTPHandler, - HTTPHandler, - _get_httpx_client, - get_async_httpx_client, -) -from litellm.llms.huggingface.chat.transformation import ( - HuggingfaceChatConfig as HuggingfaceConfig, -) -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import EmbeddingResponse -from litellm.types.utils import Logprobs as TextCompletionLogprobs -from litellm.types.utils import ModelResponse - -from ...base import BaseLLM -from ..common_utils import HuggingfaceError - -hf_chat_config = HuggingfaceConfig() - - -hf_tasks_embeddings = Literal[ # pipeline tags + hf tei endpoints - https://huggingface.github.io/text-embeddings-inference/#/ - "sentence-similarity", "feature-extraction", "rerank", "embed", "similarity" -] - - -def get_hf_task_embedding_for_model( - model: str, task_type: Optional[str], api_base: str -) -> Optional[str]: - if task_type is not None: - if task_type in get_args(hf_tasks_embeddings): - return task_type - else: - raise Exception( - "Invalid task_type={}. Expected one of={}".format( - task_type, hf_tasks_embeddings - ) - ) - http_client = HTTPHandler(concurrent_limit=1) - - model_info = http_client.get(url=api_base) - - model_info_dict = model_info.json() - - pipeline_tag: Optional[str] = model_info_dict.get("pipeline_tag", None) - - return pipeline_tag - - -async def async_get_hf_task_embedding_for_model( - model: str, task_type: Optional[str], api_base: str -) -> Optional[str]: - if task_type is not None: - if task_type in get_args(hf_tasks_embeddings): - return task_type - else: - raise Exception( - "Invalid task_type={}. Expected one of={}".format( - task_type, hf_tasks_embeddings - ) - ) - http_client = get_async_httpx_client( - llm_provider=litellm.LlmProviders.HUGGINGFACE, - ) - - model_info = await http_client.get(url=api_base) - - model_info_dict = model_info.json() - - pipeline_tag: Optional[str] = model_info_dict.get("pipeline_tag", None) - - return pipeline_tag - - -async def make_call( - client: Optional[AsyncHTTPHandler], - api_base: str, - headers: dict, - data: str, - model: str, - messages: list, - logging_obj, - timeout: Optional[Union[float, httpx.Timeout]], - json_mode: bool, -) -> Tuple[Any, httpx.Headers]: - if client is None: - client = litellm.module_level_aclient - - try: - response = await client.post( - api_base, headers=headers, data=data, stream=True, timeout=timeout - ) - except httpx.HTTPStatusError as e: - error_headers = getattr(e, "headers", None) - error_response = getattr(e, "response", None) - if error_headers is None and error_response: - error_headers = getattr(error_response, "headers", None) - raise HuggingfaceError( - status_code=e.response.status_code, - message=str(await e.response.aread()), - headers=cast(dict, error_headers) if error_headers else None, - ) - except Exception as e: - for exception in litellm.LITELLM_EXCEPTION_TYPES: - if isinstance(e, exception): - raise e - raise HuggingfaceError(status_code=500, message=str(e)) - - # LOGGING - logging_obj.post_call( - input=messages, - api_key="", - original_response=response, # Pass the completion stream for logging - additional_args={"complete_input_dict": data}, - ) - - return response.aiter_lines(), response.headers - - -class Huggingface(BaseLLM): - _client_session: Optional[httpx.Client] = None - _aclient_session: Optional[httpx.AsyncClient] = None - - def __init__(self) -> None: - super().__init__() - - def completion( # noqa: PLR0915 - self, - model: str, - messages: list, - api_base: Optional[str], - model_response: ModelResponse, - print_verbose: Callable, - timeout: float, - encoding, - api_key, - logging_obj, - optional_params: dict, - litellm_params: dict, - custom_prompt_dict={}, - acompletion: bool = False, - logger_fn=None, - client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, - headers: dict = {}, - ): - super().completion() - exception_mapping_worked = False - try: - task, model = hf_chat_config.get_hf_task_for_model(model) - litellm_params["task"] = task - headers = hf_chat_config.validate_environment( - api_key=api_key, - headers=headers, - model=model, - messages=messages, - optional_params=optional_params, - ) - completion_url = hf_chat_config.get_api_base(api_base=api_base, model=model) - data = hf_chat_config.transform_request( - model=model, - messages=messages, - optional_params=optional_params, - litellm_params=litellm_params, - headers=headers, - ) - - ## LOGGING - logging_obj.pre_call( - input=data, - api_key=api_key, - additional_args={ - "complete_input_dict": data, - "headers": headers, - "api_base": completion_url, - "acompletion": acompletion, - }, - ) - ## COMPLETION CALL - - if acompletion is True: - ### ASYNC STREAMING - if optional_params.get("stream", False): - return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model, timeout=timeout, messages=messages) # type: ignore - else: - ### ASYNC COMPLETION - return self.acompletion( - api_base=completion_url, - data=data, - headers=headers, - model_response=model_response, - encoding=encoding, - model=model, - optional_params=optional_params, - timeout=timeout, - litellm_params=litellm_params, - logging_obj=logging_obj, - api_key=api_key, - messages=messages, - client=( - client - if client is not None - and isinstance(client, AsyncHTTPHandler) - else None - ), - ) - if client is None or not isinstance(client, HTTPHandler): - client = _get_httpx_client() - ### SYNC STREAMING - if "stream" in optional_params and optional_params["stream"] is True: - response = client.post( - url=completion_url, - headers=headers, - data=json.dumps(data), - stream=optional_params["stream"], - ) - return response.iter_lines() - ### SYNC COMPLETION - else: - response = client.post( - url=completion_url, - headers=headers, - data=json.dumps(data), - ) - - return hf_chat_config.transform_response( - model=model, - raw_response=response, - model_response=model_response, - logging_obj=logging_obj, - api_key=api_key, - request_data=data, - messages=messages, - optional_params=optional_params, - encoding=encoding, - json_mode=None, - litellm_params=litellm_params, - ) - except httpx.HTTPStatusError as e: - raise HuggingfaceError( - status_code=e.response.status_code, - message=e.response.text, - headers=e.response.headers, - ) - except HuggingfaceError as e: - exception_mapping_worked = True - raise e - except Exception as e: - if exception_mapping_worked: - raise e - else: - import traceback - - raise HuggingfaceError(status_code=500, message=traceback.format_exc()) - - async def acompletion( - self, - api_base: str, - data: dict, - headers: dict, - model_response: ModelResponse, - encoding: Any, - model: str, - optional_params: dict, - litellm_params: dict, - timeout: float, - logging_obj: LiteLLMLoggingObj, - api_key: str, - messages: List[AllMessageValues], - client: Optional[AsyncHTTPHandler] = None, - ): - response: Optional[httpx.Response] = None - try: - if client is None: - client = get_async_httpx_client( - llm_provider=litellm.LlmProviders.HUGGINGFACE - ) - ### ASYNC COMPLETION - http_response = await client.post( - url=api_base, headers=headers, data=json.dumps(data), timeout=timeout - ) - - response = http_response - - return hf_chat_config.transform_response( - model=model, - raw_response=http_response, - model_response=model_response, - logging_obj=logging_obj, - api_key=api_key, - request_data=data, - messages=messages, - optional_params=optional_params, - encoding=encoding, - json_mode=None, - litellm_params=litellm_params, - ) - except Exception as e: - if isinstance(e, httpx.TimeoutException): - raise HuggingfaceError(status_code=500, message="Request Timeout Error") - elif isinstance(e, HuggingfaceError): - raise e - elif response is not None and hasattr(response, "text"): - raise HuggingfaceError( - status_code=500, - message=f"{str(e)}\n\nOriginal Response: {response.text}", - headers=response.headers, - ) - else: - raise HuggingfaceError(status_code=500, message=f"{str(e)}") - - async def async_streaming( - self, - logging_obj, - api_base: str, - data: dict, - headers: dict, - model_response: ModelResponse, - messages: List[AllMessageValues], - model: str, - timeout: float, - client: Optional[AsyncHTTPHandler] = None, - ): - completion_stream, _ = await make_call( - client=client, - api_base=api_base, - headers=headers, - data=json.dumps(data), - model=model, - messages=messages, - logging_obj=logging_obj, - timeout=timeout, - json_mode=False, - ) - streamwrapper = CustomStreamWrapper( - completion_stream=completion_stream, - model=model, - custom_llm_provider="huggingface", - logging_obj=logging_obj, - ) - return streamwrapper - - def _transform_input_on_pipeline_tag( - self, input: List, pipeline_tag: Optional[str] - ) -> dict: - if pipeline_tag is None: - return {"inputs": input} - if pipeline_tag == "sentence-similarity" or pipeline_tag == "similarity": - if len(input) < 2: - raise HuggingfaceError( - status_code=400, - message="sentence-similarity requires 2+ sentences", - ) - return {"inputs": {"source_sentence": input[0], "sentences": input[1:]}} - elif pipeline_tag == "rerank": - if len(input) < 2: - raise HuggingfaceError( - status_code=400, - message="reranker requires 2+ sentences", - ) - return {"inputs": {"query": input[0], "texts": input[1:]}} - return {"inputs": input} # default to feature-extraction pipeline tag - - async def _async_transform_input( - self, - model: str, - task_type: Optional[str], - embed_url: str, - input: List, - optional_params: dict, - ) -> dict: - hf_task = await async_get_hf_task_embedding_for_model( - model=model, task_type=task_type, api_base=embed_url - ) - - data = self._transform_input_on_pipeline_tag(input=input, pipeline_tag=hf_task) - - if len(optional_params.keys()) > 0: - data["options"] = optional_params - - return data - - def _process_optional_params(self, data: dict, optional_params: dict) -> dict: - special_options_keys = HuggingfaceConfig().get_special_options_params() - special_parameters_keys = [ - "min_length", - "max_length", - "top_k", - "top_p", - "temperature", - "repetition_penalty", - "max_time", - ] - - for k, v in optional_params.items(): - if k in special_options_keys: - data.setdefault("options", {}) - data["options"][k] = v - elif k in special_parameters_keys: - data.setdefault("parameters", {}) - data["parameters"][k] = v - else: - data[k] = v - - return data - - def _transform_input( - self, - input: List, - model: str, - call_type: Literal["sync", "async"], - optional_params: dict, - embed_url: str, - ) -> dict: - data: Dict = {} - - ## TRANSFORMATION ## - if "sentence-transformers" in model: - if len(input) == 0: - raise HuggingfaceError( - status_code=400, - message="sentence transformers requires 2+ sentences", - ) - data = {"inputs": {"source_sentence": input[0], "sentences": input[1:]}} - else: - data = {"inputs": input} - - task_type = optional_params.pop("input_type", None) - - if call_type == "sync": - hf_task = get_hf_task_embedding_for_model( - model=model, task_type=task_type, api_base=embed_url - ) - elif call_type == "async": - return self._async_transform_input( - model=model, task_type=task_type, embed_url=embed_url, input=input - ) # type: ignore - - data = self._transform_input_on_pipeline_tag( - input=input, pipeline_tag=hf_task - ) - - if len(optional_params.keys()) > 0: - data = self._process_optional_params( - data=data, optional_params=optional_params - ) - - return data - - def _process_embedding_response( - self, - embeddings: dict, - model_response: EmbeddingResponse, - model: str, - input: List, - encoding: Any, - ) -> EmbeddingResponse: - output_data = [] - if "similarities" in embeddings: - for idx, embedding in embeddings["similarities"]: - output_data.append( - { - "object": "embedding", - "index": idx, - "embedding": embedding, # flatten list returned from hf - } - ) - else: - for idx, embedding in enumerate(embeddings): - if isinstance(embedding, float): - output_data.append( - { - "object": "embedding", - "index": idx, - "embedding": embedding, # flatten list returned from hf - } - ) - elif isinstance(embedding, list) and isinstance(embedding[0], float): - output_data.append( - { - "object": "embedding", - "index": idx, - "embedding": embedding, # flatten list returned from hf - } - ) - else: - output_data.append( - { - "object": "embedding", - "index": idx, - "embedding": embedding[0][ - 0 - ], # flatten list returned from hf - } - ) - model_response.object = "list" - model_response.data = output_data - model_response.model = model - input_tokens = 0 - for text in input: - input_tokens += len(encoding.encode(text)) - - setattr( - model_response, - "usage", - litellm.Usage( - prompt_tokens=input_tokens, - completion_tokens=input_tokens, - total_tokens=input_tokens, - prompt_tokens_details=None, - completion_tokens_details=None, - ), - ) - return model_response - - async def aembedding( - self, - model: str, - input: list, - model_response: litellm.utils.EmbeddingResponse, - timeout: Union[float, httpx.Timeout], - logging_obj: LiteLLMLoggingObj, - optional_params: dict, - api_base: str, - api_key: Optional[str], - headers: dict, - encoding: Callable, - client: Optional[AsyncHTTPHandler] = None, - ): - ## TRANSFORMATION ## - data = self._transform_input( - input=input, - model=model, - call_type="sync", - optional_params=optional_params, - embed_url=api_base, - ) - - ## LOGGING - logging_obj.pre_call( - input=input, - api_key=api_key, - additional_args={ - "complete_input_dict": data, - "headers": headers, - "api_base": api_base, - }, - ) - ## COMPLETION CALL - if client is None: - client = get_async_httpx_client( - llm_provider=litellm.LlmProviders.HUGGINGFACE, - ) - - response = await client.post(api_base, headers=headers, data=json.dumps(data)) - - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response, - ) - - embeddings = response.json() - - if "error" in embeddings: - raise HuggingfaceError(status_code=500, message=embeddings["error"]) - - ## PROCESS RESPONSE ## - return self._process_embedding_response( - embeddings=embeddings, - model_response=model_response, - model=model, - input=input, - encoding=encoding, - ) - - def embedding( - self, - model: str, - input: list, - model_response: EmbeddingResponse, - optional_params: dict, - logging_obj: LiteLLMLoggingObj, - encoding: Callable, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - timeout: Union[float, httpx.Timeout] = httpx.Timeout(None), - aembedding: Optional[bool] = None, - client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, - headers={}, - ) -> EmbeddingResponse: - super().embedding() - headers = hf_chat_config.validate_environment( - api_key=api_key, - headers=headers, - model=model, - optional_params=optional_params, - messages=[], - ) - # print_verbose(f"{model}, {task}") - embed_url = "" - if "https" in model: - embed_url = model - elif api_base: - embed_url = api_base - elif "HF_API_BASE" in os.environ: - embed_url = os.getenv("HF_API_BASE", "") - elif "HUGGINGFACE_API_BASE" in os.environ: - embed_url = os.getenv("HUGGINGFACE_API_BASE", "") - else: - embed_url = f"https://api-inference.huggingface.co/models/{model}" - - ## ROUTING ## - if aembedding is True: - return self.aembedding( - input=input, - model_response=model_response, - timeout=timeout, - logging_obj=logging_obj, - headers=headers, - api_base=embed_url, # type: ignore - api_key=api_key, - client=client if isinstance(client, AsyncHTTPHandler) else None, - model=model, - optional_params=optional_params, - encoding=encoding, - ) - - ## TRANSFORMATION ## - - data = self._transform_input( - input=input, - model=model, - call_type="sync", - optional_params=optional_params, - embed_url=embed_url, - ) - - ## LOGGING - logging_obj.pre_call( - input=input, - api_key=api_key, - additional_args={ - "complete_input_dict": data, - "headers": headers, - "api_base": embed_url, - }, - ) - ## COMPLETION CALL - if client is None or not isinstance(client, HTTPHandler): - client = HTTPHandler(concurrent_limit=1) - response = client.post(embed_url, headers=headers, data=json.dumps(data)) - - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response, - ) - - embeddings = response.json() - - if "error" in embeddings: - raise HuggingfaceError(status_code=500, message=embeddings["error"]) - - ## PROCESS RESPONSE ## - return self._process_embedding_response( - embeddings=embeddings, - model_response=model_response, - model=model, - input=input, - encoding=encoding, - ) - - def _transform_logprobs( - self, hf_response: Optional[List] - ) -> Optional[TextCompletionLogprobs]: - """ - Transform Hugging Face logprobs to OpenAI.Completion() format - """ - if hf_response is None: - return None - - # Initialize an empty list for the transformed logprobs - _logprob: TextCompletionLogprobs = TextCompletionLogprobs( - text_offset=[], - token_logprobs=[], - tokens=[], - top_logprobs=[], - ) - - # For each Hugging Face response, transform the logprobs - for response in hf_response: - # Extract the relevant information from the response - response_details = response["details"] - top_tokens = response_details.get("top_tokens", {}) - - for i, token in enumerate(response_details["prefill"]): - # Extract the text of the token - token_text = token["text"] - - # Extract the logprob of the token - token_logprob = token["logprob"] - - # Add the token information to the 'token_info' list - cast(List[str], _logprob.tokens).append(token_text) - cast(List[float], _logprob.token_logprobs).append(token_logprob) - - # stub this to work with llm eval harness - top_alt_tokens = {"": -1.0, "": -2.0, "": -3.0} # noqa: F601 - cast(List[Dict[str, float]], _logprob.top_logprobs).append( - top_alt_tokens - ) - - # For each element in the 'tokens' list, extract the relevant information - for i, token in enumerate(response_details["tokens"]): - # Extract the text of the token - token_text = token["text"] - - # Extract the logprob of the token - token_logprob = token["logprob"] - - top_alt_tokens = {} - temp_top_logprobs = [] - if top_tokens != {}: - temp_top_logprobs = top_tokens[i] - - # top_alt_tokens should look like this: { "alternative_1": -1, "alternative_2": -2, "alternative_3": -3 } - for elem in temp_top_logprobs: - text = elem["text"] - logprob = elem["logprob"] - top_alt_tokens[text] = logprob - - # Add the token information to the 'token_info' list - cast(List[str], _logprob.tokens).append(token_text) - cast(List[float], _logprob.token_logprobs).append(token_logprob) - cast(List[Dict[str, float]], _logprob.top_logprobs).append( - top_alt_tokens - ) - - # Add the text offset of the token - # This is computed as the sum of the lengths of all previous tokens - cast(List[int], _logprob.text_offset).append( - sum(len(t["text"]) for t in response_details["tokens"][:i]) - ) - - return _logprob diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py index 082960b2c2..c84f03ab93 100644 --- a/litellm/llms/huggingface/chat/transformation.py +++ b/litellm/llms/huggingface/chat/transformation.py @@ -1,27 +1,10 @@ -import json +import logging import os -import time -from copy import deepcopy -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, List, Optional, Union import httpx -import litellm -from litellm.litellm_core_utils.prompt_templates.common_utils import ( - convert_content_list_to_str, -) -from litellm.litellm_core_utils.prompt_templates.factory import ( - custom_prompt, - prompt_factory, -) -from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper -from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException -from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import Choices, Message, ModelResponse, Usage -from litellm.utils import token_counter - -from ..common_utils import HuggingfaceError, hf_task_list, hf_tasks, output_parser +from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest if TYPE_CHECKING: from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj @@ -30,176 +13,98 @@ if TYPE_CHECKING: else: LoggingClass = Any +from litellm.llms.base_llm.chat.transformation import BaseLLMException -tgi_models_cache = None -conv_models_cache = None +from ...openai.chat.gpt_transformation import OpenAIGPTConfig +from ..common_utils import HuggingFaceError, _fetch_inference_provider_mapping -class HuggingfaceChatConfig(BaseConfig): +logger = logging.getLogger(__name__) + +BASE_URL = "https://router.huggingface.co" + + +class HuggingFaceChatConfig(OpenAIGPTConfig): """ - Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate + Reference: https://huggingface.co/docs/huggingface_hub/guides/inference """ - hf_task: Optional[ - hf_tasks - ] = None # litellm-specific param, used to know the api spec to use when calling huggingface api - best_of: Optional[int] = None - decoder_input_details: Optional[bool] = None - details: Optional[bool] = True # enables returning logprobs + best of - max_new_tokens: Optional[int] = None - repetition_penalty: Optional[float] = None - return_full_text: Optional[ - bool - ] = False # by default don't return the input as part of the output - seed: Optional[int] = None - temperature: Optional[float] = None - top_k: Optional[int] = None - top_n_tokens: Optional[int] = None - top_p: Optional[int] = None - truncate: Optional[int] = None - typical_p: Optional[float] = None - watermark: Optional[bool] = None - - def __init__( + def validate_environment( self, - best_of: Optional[int] = None, - decoder_input_details: Optional[bool] = None, - details: Optional[bool] = None, - max_new_tokens: Optional[int] = None, - repetition_penalty: Optional[float] = None, - return_full_text: Optional[bool] = None, - seed: Optional[int] = None, - temperature: Optional[float] = None, - top_k: Optional[int] = None, - top_n_tokens: Optional[int] = None, - top_p: Optional[int] = None, - truncate: Optional[int] = None, - typical_p: Optional[float] = None, - watermark: Optional[bool] = None, - ) -> None: - locals_ = locals().copy() - for key, value in locals_.items(): - if key != "self" and value is not None: - setattr(self.__class__, key, value) - - @classmethod - def get_config(cls): - return super().get_config() - - def get_special_options_params(self): - return ["use_cache", "wait_for_model"] - - def get_supported_openai_params(self, model: str): - return [ - "stream", - "temperature", - "max_tokens", - "max_completion_tokens", - "top_p", - "stop", - "n", - "echo", - ] - - def map_openai_params( - self, - non_default_params: Dict, - optional_params: Dict, + headers: dict, model: str, - drop_params: bool, - ) -> Dict: - for param, value in non_default_params.items(): - # temperature, top_p, n, stream, stop, max_tokens, n, presence_penalty default to None - if param == "temperature": - if value == 0.0 or value == 0: - # hugging face exception raised when temp==0 - # Failed: Error occurred: HuggingfaceException - Input validation error: `temperature` must be strictly positive - value = 0.01 - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - if param == "n": - optional_params["best_of"] = value - optional_params[ - "do_sample" - ] = True # Need to sample if you want best of for hf inference endpoints - if param == "stream": - optional_params["stream"] = value - if param == "stop": - optional_params["stop"] = value - if param == "max_tokens" or param == "max_completion_tokens": - # HF TGI raises the following exception when max_new_tokens==0 - # Failed: Error occurred: HuggingfaceException - Input validation error: `max_new_tokens` must be strictly positive - if value == 0: - value = 1 - optional_params["max_new_tokens"] = value - if param == "echo": - # https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation.decoder_input_details - # Return the decoder input token logprobs and ids. You must set details=True as well for it to be taken into account. Defaults to False - optional_params["decoder_input_details"] = True + messages: List[AllMessageValues], + optional_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + default_headers = { + "content-type": "application/json", + } + if api_key is not None: + default_headers["Authorization"] = f"Bearer {api_key}" - return optional_params + headers = {**headers, **default_headers} - def get_hf_api_key(self) -> Optional[str]: - return get_secret_str("HUGGINGFACE_API_KEY") + return headers - def read_tgi_conv_models(self): - try: - global tgi_models_cache, conv_models_cache - # Check if the cache is already populated - # so we don't keep on reading txt file if there are 1k requests - if (tgi_models_cache is not None) and (conv_models_cache is not None): - return tgi_models_cache, conv_models_cache - # If not, read the file and populate the cache - tgi_models = set() - script_directory = os.path.dirname(os.path.abspath(__file__)) - script_directory = os.path.dirname(script_directory) - # Construct the file path relative to the script's directory - file_path = os.path.join( - script_directory, - "huggingface_llms_metadata", - "hf_text_generation_models.txt", - ) + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return HuggingFaceError(status_code=status_code, message=error_message, headers=headers) - with open(file_path, "r") as file: - for line in file: - tgi_models.add(line.strip()) + def get_base_url(self, model: str, base_url: Optional[str]) -> Optional[str]: + """ + Get the API base for the Huggingface API. - # Cache the set for future use - tgi_models_cache = tgi_models + Do not add the chat/embedding/rerank extension here. Let the handler do this. + """ + if model.startswith(("http://", "https://")): + base_url = model + elif base_url is None: + base_url = os.getenv("HF_API_BASE") or os.getenv("HUGGINGFACE_API_BASE", "") + return base_url - # If not, read the file and populate the cache - file_path = os.path.join( - script_directory, - "huggingface_llms_metadata", - "hf_conversational_models.txt", - ) - conv_models = set() - with open(file_path, "r") as file: - for line in file: - conv_models.add(line.strip()) - # Cache the set for future use - conv_models_cache = conv_models - return tgi_models, conv_models - except Exception: - return set(), set() - - def get_hf_task_for_model(self, model: str) -> Tuple[hf_tasks, str]: - # read text file, cast it to set - # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt" - if model.split("/")[0] in hf_task_list: - split_model = model.split("/", 1) - return split_model[0], split_model[1] # type: ignore - tgi_models, conversational_models = self.read_tgi_conv_models() - - if model in tgi_models: - return "text-generation-inference", model - elif model in conversational_models: - return "conversational", model - elif "roneneldan/TinyStories" in model: - return "text-generation", model + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + """ + Get the complete URL for the API call. + For provider-specific routing through huggingface + """ + # 1. Check if api_base is provided + if api_base is not None: + complete_url = api_base + elif os.getenv("HF_API_BASE") or os.getenv("HUGGINGFACE_API_BASE"): + complete_url = str(os.getenv("HF_API_BASE")) or str(os.getenv("HUGGINGFACE_API_BASE")) + elif model.startswith(("http://", "https://")): + complete_url = model + # 4. Default construction with provider else: - return "text-generation-inference", model # default to tgi + # Parse provider and model + first_part, remaining = model.split("/", 1) + if "/" in remaining: + provider = first_part + else: + provider = "hf-inference" + + if provider == "hf-inference": + route = f"{provider}/models/{model}/v1/chat/completions" + elif provider == "novita": + route = f"{provider}/chat/completions" + else: + route = f"{provider}/v1/chat/completions" + complete_url = f"{BASE_URL}/{route}" + + # Ensure URL doesn't end with a slash + complete_url = complete_url.rstrip("/") + return complete_url def transform_request( self, @@ -209,381 +114,28 @@ class HuggingfaceChatConfig(BaseConfig): litellm_params: dict, headers: dict, ) -> dict: - task = litellm_params.get("task", None) - ## VALIDATE API FORMAT - if task is None or not isinstance(task, str) or task not in hf_task_list: - raise Exception( - "Invalid hf task - {}. Valid formats - {}.".format(task, hf_tasks) - ) - - ## Load Config - config = litellm.HuggingfaceConfig.get_config() - for k, v in config.items(): - if ( - k not in optional_params - ): # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in - optional_params[k] = v - - ### MAP INPUT PARAMS - #### HANDLE SPECIAL PARAMS - special_params = self.get_special_options_params() - special_params_dict = {} - # Create a list of keys to pop after iteration - keys_to_pop = [] - - for k, v in optional_params.items(): - if k in special_params: - special_params_dict[k] = v - keys_to_pop.append(k) - - # Pop the keys from the dictionary after iteration - for k in keys_to_pop: - optional_params.pop(k) - if task == "conversational": - inference_params = deepcopy(optional_params) - inference_params.pop("details") - inference_params.pop("return_full_text") - past_user_inputs = [] - generated_responses = [] - text = "" - for message in messages: - if message["role"] == "user": - if text != "": - past_user_inputs.append(text) - text = convert_content_list_to_str(message) - elif message["role"] == "assistant" or message["role"] == "system": - generated_responses.append(convert_content_list_to_str(message)) - data = { - "inputs": { - "text": text, - "past_user_inputs": past_user_inputs, - "generated_responses": generated_responses, - }, - "parameters": inference_params, - } - - elif task == "text-generation-inference": - # always send "details" and "return_full_text" as params - if model in litellm.custom_prompt_dict: - # check if the model has a registered custom prompt - model_prompt_details = litellm.custom_prompt_dict[model] - prompt = custom_prompt( - role_dict=model_prompt_details.get("roles", None), - initial_prompt_value=model_prompt_details.get( - "initial_prompt_value", "" - ), - final_prompt_value=model_prompt_details.get( - "final_prompt_value", "" - ), - messages=messages, - ) - else: - prompt = prompt_factory(model=model, messages=messages) - data = { - "inputs": prompt, # type: ignore - "parameters": optional_params, - "stream": ( # type: ignore - True - if "stream" in optional_params - and isinstance(optional_params["stream"], bool) - and optional_params["stream"] is True # type: ignore - else False - ), - } + if "max_retries" in optional_params: + logger.warning("`max_retries` is not supported. It will be ignored.") + optional_params.pop("max_retries", None) + first_part, remaining = model.split("/", 1) + if "/" in remaining: + provider = first_part + model_id = remaining else: - # Non TGI and Conversational llms - # We need this branch, it removes 'details' and 'return_full_text' from params - if model in litellm.custom_prompt_dict: - # check if the model has a registered custom prompt - model_prompt_details = litellm.custom_prompt_dict[model] - prompt = custom_prompt( - role_dict=model_prompt_details.get("roles", {}), - initial_prompt_value=model_prompt_details.get( - "initial_prompt_value", "" - ), - final_prompt_value=model_prompt_details.get( - "final_prompt_value", "" - ), - bos_token=model_prompt_details.get("bos_token", ""), - eos_token=model_prompt_details.get("eos_token", ""), - messages=messages, - ) - else: - prompt = prompt_factory(model=model, messages=messages) - inference_params = deepcopy(optional_params) - inference_params.pop("details") - inference_params.pop("return_full_text") - data = { - "inputs": prompt, # type: ignore - } - if task == "text-generation-inference": - data["parameters"] = inference_params - data["stream"] = ( # type: ignore - True # type: ignore - if "stream" in optional_params and optional_params["stream"] is True - else False - ) - - ### RE-ADD SPECIAL PARAMS - if len(special_params_dict.keys()) > 0: - data.update({"options": special_params_dict}) - - return data - - def get_api_base(self, api_base: Optional[str], model: str) -> str: - """ - Get the API base for the Huggingface API. - - Do not add the chat/embedding/rerank extension here. Let the handler do this. - """ - if "https" in model: - completion_url = model - elif api_base is not None: - completion_url = api_base - elif "HF_API_BASE" in os.environ: - completion_url = os.getenv("HF_API_BASE", "") - elif "HUGGINGFACE_API_BASE" in os.environ: - completion_url = os.getenv("HUGGINGFACE_API_BASE", "") - else: - completion_url = f"https://api-inference.huggingface.co/models/{model}" - - return completion_url - - def validate_environment( - self, - headers: Dict, - model: str, - messages: List[AllMessageValues], - optional_params: Dict, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - ) -> Dict: - default_headers = { - "content-type": "application/json", - } - if api_key is not None: - default_headers[ - "Authorization" - ] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens - - headers = {**headers, **default_headers} - return headers - - def get_error_class( - self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] - ) -> BaseLLMException: - return HuggingfaceError( - status_code=status_code, message=error_message, headers=headers - ) - - def _convert_streamed_response_to_complete_response( - self, - response: httpx.Response, - logging_obj: LoggingClass, - model: str, - data: dict, - api_key: Optional[str] = None, - ) -> List[Dict[str, Any]]: - streamed_response = CustomStreamWrapper( - completion_stream=response.iter_lines(), - model=model, - custom_llm_provider="huggingface", - logging_obj=logging_obj, - ) - content = "" - for chunk in streamed_response: - content += chunk["choices"][0]["delta"]["content"] - completion_response: List[Dict[str, Any]] = [{"generated_text": content}] - ## LOGGING - logging_obj.post_call( - input=data, - api_key=api_key, - original_response=completion_response, - additional_args={"complete_input_dict": data}, - ) - return completion_response - - def convert_to_model_response_object( # noqa: PLR0915 - self, - completion_response: Union[List[Dict[str, Any]], Dict[str, Any]], - model_response: ModelResponse, - task: Optional[hf_tasks], - optional_params: dict, - encoding: Any, - messages: List[AllMessageValues], - model: str, - ): - if task is None: - task = "text-generation-inference" # default to tgi - - if task == "conversational": - if len(completion_response["generated_text"]) > 0: # type: ignore - model_response.choices[0].message.content = completion_response[ # type: ignore - "generated_text" - ] - elif task == "text-generation-inference": - if ( - not isinstance(completion_response, list) - or not isinstance(completion_response[0], dict) - or "generated_text" not in completion_response[0] - ): - raise HuggingfaceError( - status_code=422, - message=f"response is not in expected format - {completion_response}", - headers=None, - ) - - if len(completion_response[0]["generated_text"]) > 0: - model_response.choices[0].message.content = output_parser( # type: ignore - completion_response[0]["generated_text"] - ) - ## GETTING LOGPROBS + FINISH REASON - if ( - "details" in completion_response[0] - and "tokens" in completion_response[0]["details"] - ): - model_response.choices[0].finish_reason = completion_response[0][ - "details" - ]["finish_reason"] - sum_logprob = 0 - for token in completion_response[0]["details"]["tokens"]: - if token["logprob"] is not None: - sum_logprob += token["logprob"] - setattr(model_response.choices[0].message, "_logprob", sum_logprob) # type: ignore - if "best_of" in optional_params and optional_params["best_of"] > 1: - if ( - "details" in completion_response[0] - and "best_of_sequences" in completion_response[0]["details"] - ): - choices_list = [] - for idx, item in enumerate( - completion_response[0]["details"]["best_of_sequences"] - ): - sum_logprob = 0 - for token in item["tokens"]: - if token["logprob"] is not None: - sum_logprob += token["logprob"] - if len(item["generated_text"]) > 0: - message_obj = Message( - content=output_parser(item["generated_text"]), - logprobs=sum_logprob, - ) - else: - message_obj = Message(content=None) - choice_obj = Choices( - finish_reason=item["finish_reason"], - index=idx + 1, - message=message_obj, - ) - choices_list.append(choice_obj) - model_response.choices.extend(choices_list) - elif task == "text-classification": - model_response.choices[0].message.content = json.dumps( # type: ignore - completion_response + provider = "hf-inference" + model_id = model + provider_mapping = _fetch_inference_provider_mapping(model_id) + if provider not in provider_mapping: + raise HuggingFaceError( + message=f"Model {model_id} is not supported for provider {provider}", + status_code=404, + headers={}, ) - else: - if ( - isinstance(completion_response, list) - and len(completion_response[0]["generated_text"]) > 0 - ): - model_response.choices[0].message.content = output_parser( # type: ignore - completion_response[0]["generated_text"] - ) - ## CALCULATING USAGE - prompt_tokens = 0 - try: - prompt_tokens = token_counter(model=model, messages=messages) - except Exception: - # this should remain non blocking we should not block a response returning if calculating usage fails - pass - output_text = model_response["choices"][0]["message"].get("content", "") - if output_text is not None and len(output_text) > 0: - completion_tokens = 0 - try: - completion_tokens = len( - encoding.encode( - model_response["choices"][0]["message"].get("content", "") - ) - ) ##[TODO] use the llama2 tokenizer here - except Exception: - # this should remain non blocking we should not block a response returning if calculating usage fails - pass - else: - completion_tokens = 0 - - model_response.created = int(time.time()) - model_response.model = model - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, - ) - setattr(model_response, "usage", usage) - model_response._hidden_params["original_response"] = completion_response - return model_response - - def transform_response( - self, - model: str, - raw_response: httpx.Response, - model_response: ModelResponse, - logging_obj: LoggingClass, - request_data: Dict, - messages: List[AllMessageValues], - optional_params: Dict, - litellm_params: Dict, - encoding: Any, - api_key: Optional[str] = None, - json_mode: Optional[bool] = None, - ) -> ModelResponse: - ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten) - task = litellm_params.get("task", None) - is_streamed = False - if ( - raw_response.__dict__["headers"].get("Content-Type", "") - == "text/event-stream" - ): - is_streamed = True - - # iterate over the complete streamed response, and return the final answer - if is_streamed: - completion_response = self._convert_streamed_response_to_complete_response( - response=raw_response, - logging_obj=logging_obj, - model=model, - data=request_data, - api_key=api_key, + provider_mapping = provider_mapping[provider] + if provider_mapping["status"] == "staging": + logger.warning( + f"Model {model_id} is in staging mode for provider {provider}. Meant for test purposes only." ) - else: - ## LOGGING - logging_obj.post_call( - input=request_data, - api_key=api_key, - original_response=raw_response.text, - additional_args={"complete_input_dict": request_data}, - ) - ## RESPONSE OBJECT - try: - completion_response = raw_response.json() - if isinstance(completion_response, dict): - completion_response = [completion_response] - except Exception: - raise HuggingfaceError( - message=f"Original Response received: {raw_response.text}", - status_code=raw_response.status_code, - ) - - if isinstance(completion_response, dict) and "error" in completion_response: - raise HuggingfaceError( - message=completion_response["error"], # type: ignore - status_code=raw_response.status_code, - ) - return self.convert_to_model_response_object( - completion_response=completion_response, - model_response=model_response, - task=task if task is not None and task in hf_task_list else None, - optional_params=optional_params, - encoding=encoding, - messages=messages, - model=model, - ) + mapped_model = provider_mapping["providerId"] + messages = self._transform_messages(messages=messages, model=mapped_model) + return dict(ChatCompletionRequest(model=mapped_model, messages=messages, **optional_params)) diff --git a/litellm/llms/huggingface/common_utils.py b/litellm/llms/huggingface/common_utils.py index d793b29874..9ab4367c9b 100644 --- a/litellm/llms/huggingface/common_utils.py +++ b/litellm/llms/huggingface/common_utils.py @@ -1,18 +1,30 @@ +import os +from functools import lru_cache from typing import Literal, Optional, Union import httpx from litellm.llms.base_llm.chat.transformation import BaseLLMException +HF_HUB_URL = "https://huggingface.co" -class HuggingfaceError(BaseLLMException): + +class HuggingFaceError(BaseLLMException): def __init__( self, - status_code: int, - message: str, - headers: Optional[Union[dict, httpx.Headers]] = None, + status_code, + message, + request: Optional[httpx.Request] = None, + response: Optional[httpx.Response] = None, + headers: Optional[Union[httpx.Headers, dict]] = None, ): - super().__init__(status_code=status_code, message=message, headers=headers) + super().__init__( + status_code=status_code, + message=message, + request=request, + response=response, + headers=headers, + ) hf_tasks = Literal[ @@ -43,3 +55,48 @@ def output_parser(generated_text: str): if generated_text.endswith(token): generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1] return generated_text + + +@lru_cache(maxsize=128) +def _fetch_inference_provider_mapping(model: str) -> dict: + """ + Fetch provider mappings for a model from the Hugging Face Hub. + + Args: + model: The model identifier (e.g., 'meta-llama/Llama-2-7b') + + Returns: + dict: The inference provider mapping for the model + + Raises: + ValueError: If no provider mapping is found + HuggingFaceError: If the API request fails + """ + headers = {"Accept": "application/json"} + if os.getenv("HUGGINGFACE_API_KEY"): + headers["Authorization"] = f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}" + + path = f"{HF_HUB_URL}/api/models/{model}" + params = {"expand": ["inferenceProviderMapping"]} + + try: + response = httpx.get(path, headers=headers, params=params) + response.raise_for_status() + provider_mapping = response.json().get("inferenceProviderMapping") + + if provider_mapping is None: + raise ValueError(f"No provider mapping found for model {model}") + + return provider_mapping + except httpx.HTTPError as e: + if hasattr(e, "response"): + status_code = getattr(e.response, "status_code", 500) + headers = getattr(e.response, "headers", {}) + else: + status_code = 500 + headers = {} + raise HuggingFaceError( + message=f"Failed to fetch provider mapping: {str(e)}", + status_code=status_code, + headers=headers, + ) diff --git a/litellm/llms/huggingface/embedding/handler.py b/litellm/llms/huggingface/embedding/handler.py new file mode 100644 index 0000000000..7277fbd0e3 --- /dev/null +++ b/litellm/llms/huggingface/embedding/handler.py @@ -0,0 +1,421 @@ +import json +import os +from typing import ( + Any, + Callable, + Dict, + List, + Literal, + Optional, + Union, + get_args, +) + +import httpx + +import litellm +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + get_async_httpx_client, +) +from litellm.types.utils import EmbeddingResponse + +from ...base import BaseLLM +from ..common_utils import HuggingFaceError +from .transformation import HuggingFaceEmbeddingConfig + +config = HuggingFaceEmbeddingConfig() + +HF_HUB_URL = "https://huggingface.co" + +hf_tasks_embeddings = Literal[ # pipeline tags + hf tei endpoints - https://huggingface.github.io/text-embeddings-inference/#/ + "sentence-similarity", "feature-extraction", "rerank", "embed", "similarity" +] + + + +def get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]: + if task_type is not None: + if task_type in get_args(hf_tasks_embeddings): + return task_type + else: + raise Exception( + "Invalid task_type={}. Expected one of={}".format( + task_type, hf_tasks_embeddings + ) + ) + http_client = HTTPHandler(concurrent_limit=1) + + model_info = http_client.get(url=f"{api_base}/api/models/{model}") + + model_info_dict = model_info.json() + + pipeline_tag: Optional[str] = model_info_dict.get("pipeline_tag", None) + + return pipeline_tag + + +async def async_get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]: + if task_type is not None: + if task_type in get_args(hf_tasks_embeddings): + return task_type + else: + raise Exception( + "Invalid task_type={}. Expected one of={}".format( + task_type, hf_tasks_embeddings + ) + ) + http_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.HUGGINGFACE, + ) + + model_info = await http_client.get(url=f"{api_base}/api/models/{model}") + + model_info_dict = model_info.json() + + pipeline_tag: Optional[str] = model_info_dict.get("pipeline_tag", None) + + return pipeline_tag + + +class HuggingFaceEmbedding(BaseLLM): + _client_session: Optional[httpx.Client] = None + _aclient_session: Optional[httpx.AsyncClient] = None + + def __init__(self) -> None: + super().__init__() + + def _transform_input_on_pipeline_tag( + self, input: List, pipeline_tag: Optional[str] + ) -> dict: + if pipeline_tag is None: + return {"inputs": input} + if pipeline_tag == "sentence-similarity" or pipeline_tag == "similarity": + if len(input) < 2: + raise HuggingFaceError( + status_code=400, + message="sentence-similarity requires 2+ sentences", + ) + return {"inputs": {"source_sentence": input[0], "sentences": input[1:]}} + elif pipeline_tag == "rerank": + if len(input) < 2: + raise HuggingFaceError( + status_code=400, + message="reranker requires 2+ sentences", + ) + return {"inputs": {"query": input[0], "texts": input[1:]}} + return {"inputs": input} # default to feature-extraction pipeline tag + + async def _async_transform_input( + self, + model: str, + task_type: Optional[str], + embed_url: str, + input: List, + optional_params: dict, + ) -> dict: + hf_task = await async_get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL) + + data = self._transform_input_on_pipeline_tag(input=input, pipeline_tag=hf_task) + + if len(optional_params.keys()) > 0: + data["options"] = optional_params + + return data + + def _process_optional_params(self, data: dict, optional_params: dict) -> dict: + special_options_keys = config.get_special_options_params() + special_parameters_keys = [ + "min_length", + "max_length", + "top_k", + "top_p", + "temperature", + "repetition_penalty", + "max_time", + ] + + for k, v in optional_params.items(): + if k in special_options_keys: + data.setdefault("options", {}) + data["options"][k] = v + elif k in special_parameters_keys: + data.setdefault("parameters", {}) + data["parameters"][k] = v + else: + data[k] = v + + return data + + def _transform_input( + self, + input: List, + model: str, + call_type: Literal["sync", "async"], + optional_params: dict, + embed_url: str, + ) -> dict: + data: Dict = {} + + ## TRANSFORMATION ## + if "sentence-transformers" in model: + if len(input) == 0: + raise HuggingFaceError( + status_code=400, + message="sentence transformers requires 2+ sentences", + ) + data = {"inputs": {"source_sentence": input[0], "sentences": input[1:]}} + else: + data = {"inputs": input} + + task_type = optional_params.pop("input_type", None) + + if call_type == "sync": + hf_task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL) + elif call_type == "async": + return self._async_transform_input( + model=model, task_type=task_type, embed_url=embed_url, input=input + ) # type: ignore + + data = self._transform_input_on_pipeline_tag( + input=input, pipeline_tag=hf_task + ) + + if len(optional_params.keys()) > 0: + data = self._process_optional_params( + data=data, optional_params=optional_params + ) + + return data + + def _process_embedding_response( + self, + embeddings: dict, + model_response: EmbeddingResponse, + model: str, + input: List, + encoding: Any, + ) -> EmbeddingResponse: + output_data = [] + if "similarities" in embeddings: + for idx, embedding in embeddings["similarities"]: + output_data.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding, # flatten list returned from hf + } + ) + else: + for idx, embedding in enumerate(embeddings): + if isinstance(embedding, float): + output_data.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding, # flatten list returned from hf + } + ) + elif isinstance(embedding, list) and isinstance(embedding[0], float): + output_data.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding, # flatten list returned from hf + } + ) + else: + output_data.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding[0][ + 0 + ], # flatten list returned from hf + } + ) + model_response.object = "list" + model_response.data = output_data + model_response.model = model + input_tokens = 0 + for text in input: + input_tokens += len(encoding.encode(text)) + + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=input_tokens, + completion_tokens=input_tokens, + total_tokens=input_tokens, + prompt_tokens_details=None, + completion_tokens_details=None, + ), + ) + return model_response + + async def aembedding( + self, + model: str, + input: list, + model_response: litellm.utils.EmbeddingResponse, + timeout: Union[float, httpx.Timeout], + logging_obj: LiteLLMLoggingObj, + optional_params: dict, + api_base: str, + api_key: Optional[str], + headers: dict, + encoding: Callable, + client: Optional[AsyncHTTPHandler] = None, + ): + ## TRANSFORMATION ## + data = self._transform_input( + input=input, + model=model, + call_type="sync", + optional_params=optional_params, + embed_url=api_base, + ) + + ## LOGGING + logging_obj.pre_call( + input=input, + api_key=api_key, + additional_args={ + "complete_input_dict": data, + "headers": headers, + "api_base": api_base, + }, + ) + ## COMPLETION CALL + if client is None: + client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.HUGGINGFACE, + ) + + response = await client.post(api_base, headers=headers, data=json.dumps(data)) + + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response, + ) + + embeddings = response.json() + + if "error" in embeddings: + raise HuggingFaceError(status_code=500, message=embeddings["error"]) + + ## PROCESS RESPONSE ## + return self._process_embedding_response( + embeddings=embeddings, + model_response=model_response, + model=model, + input=input, + encoding=encoding, + ) + + def embedding( + self, + model: str, + input: list, + model_response: EmbeddingResponse, + optional_params: dict, + logging_obj: LiteLLMLoggingObj, + encoding: Callable, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + timeout: Union[float, httpx.Timeout] = httpx.Timeout(None), + aembedding: Optional[bool] = None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + headers={}, + ) -> EmbeddingResponse: + super().embedding() + headers = config.validate_environment( + api_key=api_key, + headers=headers, + model=model, + optional_params=optional_params, + messages=[], + ) + task_type = optional_params.pop("input_type", None) + task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL) + # print_verbose(f"{model}, {task}") + embed_url = "" + if "https" in model: + embed_url = model + elif api_base: + embed_url = api_base + elif "HF_API_BASE" in os.environ: + embed_url = os.getenv("HF_API_BASE", "") + elif "HUGGINGFACE_API_BASE" in os.environ: + embed_url = os.getenv("HUGGINGFACE_API_BASE", "") + else: + embed_url = f"https://router.huggingface.co/hf-inference/pipeline/{task}/{model}" + + ## ROUTING ## + if aembedding is True: + return self.aembedding( + input=input, + model_response=model_response, + timeout=timeout, + logging_obj=logging_obj, + headers=headers, + api_base=embed_url, # type: ignore + api_key=api_key, + client=client if isinstance(client, AsyncHTTPHandler) else None, + model=model, + optional_params=optional_params, + encoding=encoding, + ) + + ## TRANSFORMATION ## + + data = self._transform_input( + input=input, + model=model, + call_type="sync", + optional_params=optional_params, + embed_url=embed_url, + ) + + ## LOGGING + logging_obj.pre_call( + input=input, + api_key=api_key, + additional_args={ + "complete_input_dict": data, + "headers": headers, + "api_base": embed_url, + }, + ) + ## COMPLETION CALL + if client is None or not isinstance(client, HTTPHandler): + client = HTTPHandler(concurrent_limit=1) + response = client.post(embed_url, headers=headers, data=json.dumps(data)) + + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response, + ) + + embeddings = response.json() + + if "error" in embeddings: + raise HuggingFaceError(status_code=500, message=embeddings["error"]) + + ## PROCESS RESPONSE ## + return self._process_embedding_response( + embeddings=embeddings, + model_response=model_response, + model=model, + input=input, + encoding=encoding, + ) diff --git a/litellm/llms/huggingface/embedding/transformation.py b/litellm/llms/huggingface/embedding/transformation.py new file mode 100644 index 0000000000..f803157768 --- /dev/null +++ b/litellm/llms/huggingface/embedding/transformation.py @@ -0,0 +1,589 @@ +import json +import os +import time +from copy import deepcopy +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union + +import httpx + +import litellm +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + convert_content_list_to_str, +) +from litellm.litellm_core_utils.prompt_templates.factory import ( + custom_prompt, + prompt_factory, +) +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper +from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException +from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import Choices, Message, ModelResponse, Usage +from litellm.utils import token_counter + +from ..common_utils import HuggingFaceError, hf_task_list, hf_tasks, output_parser + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj + + LoggingClass = LiteLLMLoggingObj +else: + LoggingClass = Any + + +tgi_models_cache = None +conv_models_cache = None + + +class HuggingFaceEmbeddingConfig(BaseConfig): + """ + Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate + """ + + hf_task: Optional[ + hf_tasks + ] = None # litellm-specific param, used to know the api spec to use when calling huggingface api + best_of: Optional[int] = None + decoder_input_details: Optional[bool] = None + details: Optional[bool] = True # enables returning logprobs + best of + max_new_tokens: Optional[int] = None + repetition_penalty: Optional[float] = None + return_full_text: Optional[ + bool + ] = False # by default don't return the input as part of the output + seed: Optional[int] = None + temperature: Optional[float] = None + top_k: Optional[int] = None + top_n_tokens: Optional[int] = None + top_p: Optional[int] = None + truncate: Optional[int] = None + typical_p: Optional[float] = None + watermark: Optional[bool] = None + + def __init__( + self, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + details: Optional[bool] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[int] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return super().get_config() + + def get_special_options_params(self): + return ["use_cache", "wait_for_model"] + + def get_supported_openai_params(self, model: str): + return [ + "stream", + "temperature", + "max_tokens", + "max_completion_tokens", + "top_p", + "stop", + "n", + "echo", + ] + + def map_openai_params( + self, + non_default_params: Dict, + optional_params: Dict, + model: str, + drop_params: bool, + ) -> Dict: + for param, value in non_default_params.items(): + # temperature, top_p, n, stream, stop, max_tokens, n, presence_penalty default to None + if param == "temperature": + if value == 0.0 or value == 0: + # hugging face exception raised when temp==0 + # Failed: Error occurred: HuggingfaceException - Input validation error: `temperature` must be strictly positive + value = 0.01 + optional_params["temperature"] = value + if param == "top_p": + optional_params["top_p"] = value + if param == "n": + optional_params["best_of"] = value + optional_params[ + "do_sample" + ] = True # Need to sample if you want best of for hf inference endpoints + if param == "stream": + optional_params["stream"] = value + if param == "stop": + optional_params["stop"] = value + if param == "max_tokens" or param == "max_completion_tokens": + # HF TGI raises the following exception when max_new_tokens==0 + # Failed: Error occurred: HuggingfaceException - Input validation error: `max_new_tokens` must be strictly positive + if value == 0: + value = 1 + optional_params["max_new_tokens"] = value + if param == "echo": + # https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation.decoder_input_details + # Return the decoder input token logprobs and ids. You must set details=True as well for it to be taken into account. Defaults to False + optional_params["decoder_input_details"] = True + + return optional_params + + def get_hf_api_key(self) -> Optional[str]: + return get_secret_str("HUGGINGFACE_API_KEY") + + def read_tgi_conv_models(self): + try: + global tgi_models_cache, conv_models_cache + # Check if the cache is already populated + # so we don't keep on reading txt file if there are 1k requests + if (tgi_models_cache is not None) and (conv_models_cache is not None): + return tgi_models_cache, conv_models_cache + # If not, read the file and populate the cache + tgi_models = set() + script_directory = os.path.dirname(os.path.abspath(__file__)) + script_directory = os.path.dirname(script_directory) + # Construct the file path relative to the script's directory + file_path = os.path.join( + script_directory, + "huggingface_llms_metadata", + "hf_text_generation_models.txt", + ) + + with open(file_path, "r") as file: + for line in file: + tgi_models.add(line.strip()) + + # Cache the set for future use + tgi_models_cache = tgi_models + + # If not, read the file and populate the cache + file_path = os.path.join( + script_directory, + "huggingface_llms_metadata", + "hf_conversational_models.txt", + ) + conv_models = set() + with open(file_path, "r") as file: + for line in file: + conv_models.add(line.strip()) + # Cache the set for future use + conv_models_cache = conv_models + return tgi_models, conv_models + except Exception: + return set(), set() + + def get_hf_task_for_model(self, model: str) -> Tuple[hf_tasks, str]: + # read text file, cast it to set + # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt" + if model.split("/")[0] in hf_task_list: + split_model = model.split("/", 1) + return split_model[0], split_model[1] # type: ignore + tgi_models, conversational_models = self.read_tgi_conv_models() + + if model in tgi_models: + return "text-generation-inference", model + elif model in conversational_models: + return "conversational", model + elif "roneneldan/TinyStories" in model: + return "text-generation", model + else: + return "text-generation-inference", model # default to tgi + + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + task = litellm_params.get("task", None) + ## VALIDATE API FORMAT + if task is None or not isinstance(task, str) or task not in hf_task_list: + raise Exception( + "Invalid hf task - {}. Valid formats - {}.".format(task, hf_tasks) + ) + + ## Load Config + config = litellm.HuggingFaceEmbeddingConfig.get_config() + for k, v in config.items(): + if ( + k not in optional_params + ): # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in + optional_params[k] = v + + ### MAP INPUT PARAMS + #### HANDLE SPECIAL PARAMS + special_params = self.get_special_options_params() + special_params_dict = {} + # Create a list of keys to pop after iteration + keys_to_pop = [] + + for k, v in optional_params.items(): + if k in special_params: + special_params_dict[k] = v + keys_to_pop.append(k) + + # Pop the keys from the dictionary after iteration + for k in keys_to_pop: + optional_params.pop(k) + if task == "conversational": + inference_params = deepcopy(optional_params) + inference_params.pop("details") + inference_params.pop("return_full_text") + past_user_inputs = [] + generated_responses = [] + text = "" + for message in messages: + if message["role"] == "user": + if text != "": + past_user_inputs.append(text) + text = convert_content_list_to_str(message) + elif message["role"] == "assistant" or message["role"] == "system": + generated_responses.append(convert_content_list_to_str(message)) + data = { + "inputs": { + "text": text, + "past_user_inputs": past_user_inputs, + "generated_responses": generated_responses, + }, + "parameters": inference_params, + } + + elif task == "text-generation-inference": + # always send "details" and "return_full_text" as params + if model in litellm.custom_prompt_dict: + # check if the model has a registered custom prompt + model_prompt_details = litellm.custom_prompt_dict[model] + prompt = custom_prompt( + role_dict=model_prompt_details.get("roles", None), + initial_prompt_value=model_prompt_details.get( + "initial_prompt_value", "" + ), + final_prompt_value=model_prompt_details.get( + "final_prompt_value", "" + ), + messages=messages, + ) + else: + prompt = prompt_factory(model=model, messages=messages) + data = { + "inputs": prompt, # type: ignore + "parameters": optional_params, + "stream": ( # type: ignore + True + if "stream" in optional_params + and isinstance(optional_params["stream"], bool) + and optional_params["stream"] is True # type: ignore + else False + ), + } + else: + # Non TGI and Conversational llms + # We need this branch, it removes 'details' and 'return_full_text' from params + if model in litellm.custom_prompt_dict: + # check if the model has a registered custom prompt + model_prompt_details = litellm.custom_prompt_dict[model] + prompt = custom_prompt( + role_dict=model_prompt_details.get("roles", {}), + initial_prompt_value=model_prompt_details.get( + "initial_prompt_value", "" + ), + final_prompt_value=model_prompt_details.get( + "final_prompt_value", "" + ), + bos_token=model_prompt_details.get("bos_token", ""), + eos_token=model_prompt_details.get("eos_token", ""), + messages=messages, + ) + else: + prompt = prompt_factory(model=model, messages=messages) + inference_params = deepcopy(optional_params) + inference_params.pop("details") + inference_params.pop("return_full_text") + data = { + "inputs": prompt, # type: ignore + } + if task == "text-generation-inference": + data["parameters"] = inference_params + data["stream"] = ( # type: ignore + True # type: ignore + if "stream" in optional_params and optional_params["stream"] is True + else False + ) + + ### RE-ADD SPECIAL PARAMS + if len(special_params_dict.keys()) > 0: + data.update({"options": special_params_dict}) + + return data + + def get_api_base(self, api_base: Optional[str], model: str) -> str: + """ + Get the API base for the Huggingface API. + + Do not add the chat/embedding/rerank extension here. Let the handler do this. + """ + if "https" in model: + completion_url = model + elif api_base is not None: + completion_url = api_base + elif "HF_API_BASE" in os.environ: + completion_url = os.getenv("HF_API_BASE", "") + elif "HUGGINGFACE_API_BASE" in os.environ: + completion_url = os.getenv("HUGGINGFACE_API_BASE", "") + else: + completion_url = f"https://api-inference.huggingface.co/models/{model}" + + return completion_url + + def validate_environment( + self, + headers: Dict, + model: str, + messages: List[AllMessageValues], + optional_params: Dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> Dict: + default_headers = { + "content-type": "application/json", + } + if api_key is not None: + default_headers[ + "Authorization" + ] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens + + headers = {**headers, **default_headers} + return headers + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return HuggingFaceError( + status_code=status_code, message=error_message, headers=headers + ) + + def _convert_streamed_response_to_complete_response( + self, + response: httpx.Response, + logging_obj: LoggingClass, + model: str, + data: dict, + api_key: Optional[str] = None, + ) -> List[Dict[str, Any]]: + streamed_response = CustomStreamWrapper( + completion_stream=response.iter_lines(), + model=model, + custom_llm_provider="huggingface", + logging_obj=logging_obj, + ) + content = "" + for chunk in streamed_response: + content += chunk["choices"][0]["delta"]["content"] + completion_response: List[Dict[str, Any]] = [{"generated_text": content}] + ## LOGGING + logging_obj.post_call( + input=data, + api_key=api_key, + original_response=completion_response, + additional_args={"complete_input_dict": data}, + ) + return completion_response + + def convert_to_model_response_object( # noqa: PLR0915 + self, + completion_response: Union[List[Dict[str, Any]], Dict[str, Any]], + model_response: ModelResponse, + task: Optional[hf_tasks], + optional_params: dict, + encoding: Any, + messages: List[AllMessageValues], + model: str, + ): + if task is None: + task = "text-generation-inference" # default to tgi + + if task == "conversational": + if len(completion_response["generated_text"]) > 0: # type: ignore + model_response.choices[0].message.content = completion_response[ # type: ignore + "generated_text" + ] + elif task == "text-generation-inference": + if ( + not isinstance(completion_response, list) + or not isinstance(completion_response[0], dict) + or "generated_text" not in completion_response[0] + ): + raise HuggingFaceError( + status_code=422, + message=f"response is not in expected format - {completion_response}", + headers=None, + ) + + if len(completion_response[0]["generated_text"]) > 0: + model_response.choices[0].message.content = output_parser( # type: ignore + completion_response[0]["generated_text"] + ) + ## GETTING LOGPROBS + FINISH REASON + if ( + "details" in completion_response[0] + and "tokens" in completion_response[0]["details"] + ): + model_response.choices[0].finish_reason = completion_response[0][ + "details" + ]["finish_reason"] + sum_logprob = 0 + for token in completion_response[0]["details"]["tokens"]: + if token["logprob"] is not None: + sum_logprob += token["logprob"] + setattr(model_response.choices[0].message, "_logprob", sum_logprob) # type: ignore + if "best_of" in optional_params and optional_params["best_of"] > 1: + if ( + "details" in completion_response[0] + and "best_of_sequences" in completion_response[0]["details"] + ): + choices_list = [] + for idx, item in enumerate( + completion_response[0]["details"]["best_of_sequences"] + ): + sum_logprob = 0 + for token in item["tokens"]: + if token["logprob"] is not None: + sum_logprob += token["logprob"] + if len(item["generated_text"]) > 0: + message_obj = Message( + content=output_parser(item["generated_text"]), + logprobs=sum_logprob, + ) + else: + message_obj = Message(content=None) + choice_obj = Choices( + finish_reason=item["finish_reason"], + index=idx + 1, + message=message_obj, + ) + choices_list.append(choice_obj) + model_response.choices.extend(choices_list) + elif task == "text-classification": + model_response.choices[0].message.content = json.dumps( # type: ignore + completion_response + ) + else: + if ( + isinstance(completion_response, list) + and len(completion_response[0]["generated_text"]) > 0 + ): + model_response.choices[0].message.content = output_parser( # type: ignore + completion_response[0]["generated_text"] + ) + ## CALCULATING USAGE + prompt_tokens = 0 + try: + prompt_tokens = token_counter(model=model, messages=messages) + except Exception: + # this should remain non blocking we should not block a response returning if calculating usage fails + pass + output_text = model_response["choices"][0]["message"].get("content", "") + if output_text is not None and len(output_text) > 0: + completion_tokens = 0 + try: + completion_tokens = len( + encoding.encode( + model_response["choices"][0]["message"].get("content", "") + ) + ) ##[TODO] use the llama2 tokenizer here + except Exception: + # this should remain non blocking we should not block a response returning if calculating usage fails + pass + else: + completion_tokens = 0 + + model_response.created = int(time.time()) + model_response.model = model + usage = Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) + setattr(model_response, "usage", usage) + model_response._hidden_params["original_response"] = completion_response + return model_response + + def transform_response( + self, + model: str, + raw_response: httpx.Response, + model_response: ModelResponse, + logging_obj: LoggingClass, + request_data: Dict, + messages: List[AllMessageValues], + optional_params: Dict, + litellm_params: Dict, + encoding: Any, + api_key: Optional[str] = None, + json_mode: Optional[bool] = None, + ) -> ModelResponse: + ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten) + task = litellm_params.get("task", None) + is_streamed = False + if ( + raw_response.__dict__["headers"].get("Content-Type", "") + == "text/event-stream" + ): + is_streamed = True + + # iterate over the complete streamed response, and return the final answer + if is_streamed: + completion_response = self._convert_streamed_response_to_complete_response( + response=raw_response, + logging_obj=logging_obj, + model=model, + data=request_data, + api_key=api_key, + ) + else: + ## LOGGING + logging_obj.post_call( + input=request_data, + api_key=api_key, + original_response=raw_response.text, + additional_args={"complete_input_dict": request_data}, + ) + ## RESPONSE OBJECT + try: + completion_response = raw_response.json() + if isinstance(completion_response, dict): + completion_response = [completion_response] + except Exception: + raise HuggingFaceError( + message=f"Original Response received: {raw_response.text}", + status_code=raw_response.status_code, + ) + + if isinstance(completion_response, dict) and "error" in completion_response: + raise HuggingFaceError( + message=completion_response["error"], # type: ignore + status_code=raw_response.status_code, + ) + return self.convert_to_model_response_object( + completion_response=completion_response, + model_response=model_response, + task=task if task is not None and task in hf_task_list else None, + optional_params=optional_params, + encoding=encoding, + messages=messages, + model=model, + ) diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py index c83220f358..fcab43901a 100644 --- a/litellm/llms/openai/chat/gpt_transformation.py +++ b/litellm/llms/openai/chat/gpt_transformation.py @@ -402,4 +402,4 @@ class OpenAIChatCompletionStreamingHandler(BaseModelResponseIterator): choices=chunk["choices"], ) except Exception as e: - raise e + raise e \ No newline at end of file diff --git a/litellm/llms/openai_like/chat/transformation.py b/litellm/llms/openai_like/chat/transformation.py index 37cfabdab5..ea9757a855 100644 --- a/litellm/llms/openai_like/chat/transformation.py +++ b/litellm/llms/openai_like/chat/transformation.py @@ -34,7 +34,7 @@ class OpenAILikeChatConfig(OpenAIGPTConfig): return api_base, dynamic_api_key @staticmethod - def _convert_tool_response_to_message( + def _json_mode_convert_tool_response_to_message( message: ChatCompletionAssistantMessage, json_mode: bool ) -> ChatCompletionAssistantMessage: """ @@ -88,8 +88,10 @@ class OpenAILikeChatConfig(OpenAIGPTConfig): if json_mode: for choice in response_json["choices"]: - message = OpenAILikeChatConfig._convert_tool_response_to_message( - choice.get("message"), json_mode + message = ( + OpenAILikeChatConfig._json_mode_convert_tool_response_to_message( + choice.get("message"), json_mode + ) ) choice["message"] = message diff --git a/litellm/llms/openrouter/chat/transformation.py b/litellm/llms/openrouter/chat/transformation.py index 452921f551..77f402a131 100644 --- a/litellm/llms/openrouter/chat/transformation.py +++ b/litellm/llms/openrouter/chat/transformation.py @@ -1,17 +1,18 @@ """ -Support for OpenAI's `/v1/chat/completions` endpoint. +Support for OpenAI's `/v1/chat/completions` endpoint. Calls done in OpenAI/openai.py as OpenRouter is openai-compatible. Docs: https://openrouter.ai/docs/parameters """ -from typing import Any, AsyncIterator, Iterator, Optional, Union +from typing import Any, AsyncIterator, Iterator, List, Optional, Union import httpx from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openrouter import OpenRouterErrorMessage from litellm.types.utils import ModelResponse, ModelResponseStream @@ -47,6 +48,27 @@ class OpenrouterConfig(OpenAIGPTConfig): ] = extra_body # openai client supports `extra_body` param return mapped_openai_params + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + """ + Transform the overall request to be sent to the API. + + Returns: + dict: The transformed request. Sent as the body of the API call. + """ + extra_body = optional_params.pop("extra_body", {}) + response = super().transform_request( + model, messages, optional_params, litellm_params, headers + ) + response.update(extra_body) + return response + def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/predibase/chat/transformation.py b/litellm/llms/predibase/chat/transformation.py index f1a2163d24..8ef0eea173 100644 --- a/litellm/llms/predibase/chat/transformation.py +++ b/litellm/llms/predibase/chat/transformation.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union from httpx import Headers, Response +from litellm.constants import DEFAULT_MAX_TOKENS from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ModelResponse @@ -27,7 +28,7 @@ class PredibaseConfig(BaseConfig): decoder_input_details: Optional[bool] = None details: bool = True # enables returning logprobs + best of max_new_tokens: int = ( - 256 # openai default - requests hang if max_new_tokens not given + DEFAULT_MAX_TOKENS # openai default - requests hang if max_new_tokens not given ) repetition_penalty: Optional[float] = None return_full_text: Optional[ diff --git a/litellm/llms/replicate/chat/handler.py b/litellm/llms/replicate/chat/handler.py index 7991c61ee3..d954416381 100644 --- a/litellm/llms/replicate/chat/handler.py +++ b/litellm/llms/replicate/chat/handler.py @@ -4,6 +4,7 @@ import time from typing import Callable, List, Union import litellm +from litellm.constants import REPLICATE_POLLING_DELAY_SECONDS from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, @@ -28,7 +29,9 @@ def handle_prediction_response_streaming( status = "" while True and (status not in ["succeeded", "failed", "canceled"]): - time.sleep(0.5) # prevent being rate limited by replicate + time.sleep( + REPLICATE_POLLING_DELAY_SECONDS + ) # prevent being rate limited by replicate print_verbose(f"replicate: polling endpoint: {prediction_url}") response = http_client.get(prediction_url, headers=headers) if response.status_code == 200: @@ -77,7 +80,9 @@ async def async_handle_prediction_response_streaming( status = "" while True and (status not in ["succeeded", "failed", "canceled"]): - await asyncio.sleep(0.5) # prevent being rate limited by replicate + await asyncio.sleep( + REPLICATE_POLLING_DELAY_SECONDS + ) # prevent being rate limited by replicate print_verbose(f"replicate: polling endpoint: {prediction_url}") response = await http_client.get(prediction_url, headers=headers) if response.status_code == 200: diff --git a/litellm/llms/replicate/chat/transformation.py b/litellm/llms/replicate/chat/transformation.py index d49350dea7..604e6eefe6 100644 --- a/litellm/llms/replicate/chat/transformation.py +++ b/litellm/llms/replicate/chat/transformation.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union import httpx import litellm +from litellm.constants import REPLICATE_MODEL_NAME_WITH_ID_LENGTH from litellm.litellm_core_utils.prompt_templates.common_utils import ( convert_content_list_to_str, ) @@ -221,10 +222,11 @@ class ReplicateConfig(BaseConfig): version_id = self.model_to_version_id(model) request_data: dict = {"input": input_data} - if ":" in version_id and len(version_id) > 64: + if ":" in version_id and len(version_id) > REPLICATE_MODEL_NAME_WITH_ID_LENGTH: model_parts = version_id.split(":") if ( - len(model_parts) > 1 and len(model_parts[1]) == 64 + len(model_parts) > 1 + and len(model_parts[1]) == REPLICATE_MODEL_NAME_WITH_ID_LENGTH ): ## checks if model name has a 64 digit code - e.g. "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3" request_data["version"] = model_parts[1] diff --git a/litellm/llms/together_ai/cost_calculator.py b/litellm/llms/together_ai/cost_calculator.py index d3b0db8b89..a1be097bc8 100644 --- a/litellm/llms/together_ai/cost_calculator.py +++ b/litellm/llms/together_ai/cost_calculator.py @@ -4,6 +4,16 @@ Handles calculating cost for together ai models import re +from litellm.constants import ( + TOGETHER_AI_4_B, + TOGETHER_AI_8_B, + TOGETHER_AI_21_B, + TOGETHER_AI_41_B, + TOGETHER_AI_80_B, + TOGETHER_AI_110_B, + TOGETHER_AI_EMBEDDING_150_M, + TOGETHER_AI_EMBEDDING_350_M, +) from litellm.types.utils import CallTypes @@ -31,17 +41,17 @@ def get_model_params_and_category(model_name, call_type: CallTypes) -> str: else: return model_name # Determine the category based on the number of parameters - if params_billion <= 4.0: + if params_billion <= TOGETHER_AI_4_B: category = "together-ai-up-to-4b" - elif params_billion <= 8.0: + elif params_billion <= TOGETHER_AI_8_B: category = "together-ai-4.1b-8b" - elif params_billion <= 21.0: + elif params_billion <= TOGETHER_AI_21_B: category = "together-ai-8.1b-21b" - elif params_billion <= 41.0: + elif params_billion <= TOGETHER_AI_41_B: category = "together-ai-21.1b-41b" - elif params_billion <= 80.0: + elif params_billion <= TOGETHER_AI_80_B: category = "together-ai-41.1b-80b" - elif params_billion <= 110.0: + elif params_billion <= TOGETHER_AI_110_B: category = "together-ai-81.1b-110b" if category is not None: return category @@ -69,9 +79,9 @@ def get_model_params_and_category_embeddings(model_name) -> str: else: return model_name # Determine the category based on the number of parameters - if params_million <= 150: + if params_million <= TOGETHER_AI_EMBEDDING_150_M: category = "together-ai-embedding-up-to-150m" - elif params_million <= 350: + elif params_million <= TOGETHER_AI_EMBEDDING_350_M: category = "together-ai-embedding-151m-to-350m" if category is not None: return category diff --git a/litellm/llms/triton/completion/transformation.py b/litellm/llms/triton/completion/transformation.py index db0add6f35..49126917f2 100644 --- a/litellm/llms/triton/completion/transformation.py +++ b/litellm/llms/triton/completion/transformation.py @@ -7,6 +7,7 @@ from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, from httpx import Headers, Response +from litellm.constants import DEFAULT_MAX_TOKENS_FOR_TRITON from litellm.litellm_core_utils.prompt_templates.factory import prompt_factory from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.chat.transformation import ( @@ -196,7 +197,9 @@ class TritonGenerateConfig(TritonConfig): data_for_triton: Dict[str, Any] = { "text_input": prompt_factory(model=model, messages=messages), "parameters": { - "max_tokens": int(optional_params.get("max_tokens", 2000)), + "max_tokens": int( + optional_params.get("max_tokens", DEFAULT_MAX_TOKENS_FOR_TRITON) + ), "bad_words": [""], "stop_words": [""], }, diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index 8067d51c87..d70fa1a089 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -224,17 +224,12 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915 if not file_id: continue - mime_type = format or _get_image_mime_type_from_url(file_id) - - if mime_type is not None: - _part = PartType( - file_data=FileDataType( - file_uri=file_id, - mime_type=mime_type, - ) + try: + _part = _process_gemini_image( + image_url=file_id, format=format ) _parts.append(_part) - else: + except Exception: raise Exception( "Unable to determine mime type for file_id: {}, set this explicitly using message[{}].content[{}].file.format".format( file_id, msg_i, element_idx diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index 860dec9eb2..d38c24bb2e 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -208,6 +208,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): "seed", "logprobs", "top_logprobs", # Added this to list of supported openAI params + "modalities", ] def map_tool_choice_values( @@ -312,6 +313,30 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): old_schema = _build_vertex_schema(parameters=old_schema) return old_schema + def apply_response_schema_transformation(self, value: dict, optional_params: dict): + # remove 'additionalProperties' from json schema + value = _remove_additional_properties(value) + # remove 'strict' from json schema + value = _remove_strict_from_schema(value) + if value["type"] == "json_object": + optional_params["response_mime_type"] = "application/json" + elif value["type"] == "text": + optional_params["response_mime_type"] = "text/plain" + if "response_schema" in value: + optional_params["response_mime_type"] = "application/json" + optional_params["response_schema"] = value["response_schema"] + elif value["type"] == "json_schema": # type: ignore + if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore + optional_params["response_mime_type"] = "application/json" + optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore + + if "response_schema" in optional_params and isinstance( + optional_params["response_schema"], dict + ): + optional_params["response_schema"] = self._map_response_schema( + value=optional_params["response_schema"] + ) + def map_openai_params( self, non_default_params: Dict, @@ -322,58 +347,39 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): for param, value in non_default_params.items(): if param == "temperature": optional_params["temperature"] = value - if param == "top_p": + elif param == "top_p": optional_params["top_p"] = value - if ( + elif ( param == "stream" and value is True ): # sending stream = False, can cause it to get passed unchecked and raise issues optional_params["stream"] = value - if param == "n": + elif param == "n": optional_params["candidate_count"] = value - if param == "stop": + elif param == "stop": if isinstance(value, str): optional_params["stop_sequences"] = [value] elif isinstance(value, list): optional_params["stop_sequences"] = value - if param == "max_tokens" or param == "max_completion_tokens": + elif param == "max_tokens" or param == "max_completion_tokens": optional_params["max_output_tokens"] = value - if param == "response_format" and isinstance(value, dict): # type: ignore - # remove 'additionalProperties' from json schema - value = _remove_additional_properties(value) - # remove 'strict' from json schema - value = _remove_strict_from_schema(value) - if value["type"] == "json_object": - optional_params["response_mime_type"] = "application/json" - elif value["type"] == "text": - optional_params["response_mime_type"] = "text/plain" - if "response_schema" in value: - optional_params["response_mime_type"] = "application/json" - optional_params["response_schema"] = value["response_schema"] - elif value["type"] == "json_schema": # type: ignore - if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore - optional_params["response_mime_type"] = "application/json" - optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore - - if "response_schema" in optional_params and isinstance( - optional_params["response_schema"], dict - ): - optional_params["response_schema"] = self._map_response_schema( - value=optional_params["response_schema"] - ) - if param == "frequency_penalty": + elif param == "response_format" and isinstance(value, dict): # type: ignore + self.apply_response_schema_transformation( + value=value, optional_params=optional_params + ) + elif param == "frequency_penalty": optional_params["frequency_penalty"] = value - if param == "presence_penalty": + elif param == "presence_penalty": optional_params["presence_penalty"] = value - if param == "logprobs": + elif param == "logprobs": optional_params["responseLogprobs"] = value - if param == "top_logprobs": + elif param == "top_logprobs": optional_params["logprobs"] = value - if (param == "tools" or param == "functions") and isinstance(value, list): + elif (param == "tools" or param == "functions") and isinstance(value, list): optional_params["tools"] = self._map_function(value=value) optional_params["litellm_param_is_function_call"] = ( True if param == "functions" else False ) - if param == "tool_choice" and ( + elif param == "tool_choice" and ( isinstance(value, str) or isinstance(value, dict) ): _tool_choice_value = self.map_tool_choice_values( @@ -381,8 +387,18 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): ) if _tool_choice_value is not None: optional_params["tool_choice"] = _tool_choice_value - if param == "seed": + elif param == "seed": optional_params["seed"] = value + elif param == "modalities" and isinstance(value, list): + response_modalities = [] + for modality in value: + if modality == "text": + response_modalities.append("TEXT") + elif modality == "image": + response_modalities.append("IMAGE") + else: + response_modalities.append("MODALITY_UNSPECIFIED") + optional_params["responseModalities"] = response_modalities if litellm.vertex_ai_safety_settings is not None: optional_params["safety_settings"] = litellm.vertex_ai_safety_settings @@ -493,6 +509,11 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): for part in parts: if "text" in part: _content_str += part["text"] + elif "inlineData" in part: # base64 encoded image + _content_str += "data:{};base64,{}".format( + part["inlineData"]["mimeType"], part["inlineData"]["data"] + ) + if _content_str: return _content_str return None @@ -676,6 +697,70 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): return usage + def _process_candidates(self, _candidates, model_response, litellm_params): + """Helper method to process candidates and extract metadata""" + grounding_metadata: List[dict] = [] + safety_ratings: List = [] + citation_metadata: List = [] + chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"} + chat_completion_logprobs: Optional[ChoiceLogprobs] = None + tools: Optional[List[ChatCompletionToolCallChunk]] = [] + functions: Optional[ChatCompletionToolCallFunctionChunk] = None + + for idx, candidate in enumerate(_candidates): + if "content" not in candidate: + continue + + if "groundingMetadata" in candidate: + grounding_metadata.append(candidate["groundingMetadata"]) # type: ignore + + if "safetyRatings" in candidate: + safety_ratings.append(candidate["safetyRatings"]) + + if "citationMetadata" in candidate: + citation_metadata.append(candidate["citationMetadata"]) + + if "parts" in candidate["content"]: + chat_completion_message[ + "content" + ] = VertexGeminiConfig().get_assistant_content_message( + parts=candidate["content"]["parts"] + ) + + functions, tools = self._transform_parts( + parts=candidate["content"]["parts"], + index=candidate.get("index", idx), + is_function_call=litellm_params.get( + "litellm_param_is_function_call" + ), + ) + + if "logprobsResult" in candidate: + chat_completion_logprobs = self._transform_logprobs( + logprobs_result=candidate["logprobsResult"] + ) + # Handle avgLogprobs for Gemini Flash 2.0 + elif "avgLogprobs" in candidate: + chat_completion_logprobs = candidate["avgLogprobs"] + + if tools: + chat_completion_message["tool_calls"] = tools + + if functions is not None: + chat_completion_message["function_call"] = functions + + choice = litellm.Choices( + finish_reason=candidate.get("finishReason", "stop"), + index=candidate.get("index", idx), + message=chat_completion_message, # type: ignore + logprobs=chat_completion_logprobs, + enhancements=None, + ) + + model_response.choices.append(choice) + + return grounding_metadata, safety_ratings, citation_metadata + def transform_response( self, model: str, @@ -737,81 +822,28 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): completion_response=completion_response, ) - model_response.choices = [] # type: ignore + model_response.choices = [] try: - ## CHECK IF GROUNDING METADATA IN REQUEST - grounding_metadata: List[dict] = [] - safety_ratings: List = [] - citation_metadata: List = [] - ## GET TEXT ## - chat_completion_message: ChatCompletionResponseMessage = { - "role": "assistant" - } - chat_completion_logprobs: Optional[ChoiceLogprobs] = None - tools: Optional[List[ChatCompletionToolCallChunk]] = [] - functions: Optional[ChatCompletionToolCallFunctionChunk] = None + grounding_metadata, safety_ratings, citation_metadata = [], [], [] if _candidates: - for idx, candidate in enumerate(_candidates): - if "content" not in candidate: - continue - - if "groundingMetadata" in candidate: - grounding_metadata.append(candidate["groundingMetadata"]) # type: ignore - - if "safetyRatings" in candidate: - safety_ratings.append(candidate["safetyRatings"]) - - if "citationMetadata" in candidate: - citation_metadata.append(candidate["citationMetadata"]) - if "parts" in candidate["content"]: - chat_completion_message[ - "content" - ] = VertexGeminiConfig().get_assistant_content_message( - parts=candidate["content"]["parts"] - ) - - functions, tools = self._transform_parts( - parts=candidate["content"]["parts"], - index=candidate.get("index", idx), - is_function_call=litellm_params.get( - "litellm_param_is_function_call" - ), - ) - - if "logprobsResult" in candidate: - chat_completion_logprobs = self._transform_logprobs( - logprobs_result=candidate["logprobsResult"] - ) - - if tools: - chat_completion_message["tool_calls"] = tools - - if functions is not None: - chat_completion_message["function_call"] = functions - choice = litellm.Choices( - finish_reason=candidate.get("finishReason", "stop"), - index=candidate.get("index", idx), - message=chat_completion_message, # type: ignore - logprobs=chat_completion_logprobs, - enhancements=None, - ) - - model_response.choices.append(choice) + ( + grounding_metadata, + safety_ratings, + citation_metadata, + ) = self._process_candidates( + _candidates, model_response, litellm_params + ) usage = self._calculate_usage(completion_response=completion_response) - setattr(model_response, "usage", usage) - ## ADD GROUNDING METADATA ## + ## ADD METADATA TO RESPONSE ## setattr(model_response, "vertex_ai_grounding_metadata", grounding_metadata) model_response._hidden_params[ "vertex_ai_grounding_metadata" - ] = ( # older approach - maintaining to prevent regressions - grounding_metadata - ) + ] = grounding_metadata - ## ADD SAFETY RATINGS ## setattr(model_response, "vertex_ai_safety_results", safety_ratings) model_response._hidden_params[ "vertex_ai_safety_results" @@ -1029,7 +1061,7 @@ class VertexLLM(VertexBase): input=messages, api_key="", additional_args={ - "complete_input_dict": data, + "complete_input_dict": request_body, "api_base": api_base, "headers": headers, }, diff --git a/litellm/llms/vertex_ai/vertex_llm_base.py b/litellm/llms/vertex_ai/vertex_llm_base.py index 8286cb515f..994e46b50b 100644 --- a/litellm/llms/vertex_ai/vertex_llm_base.py +++ b/litellm/llms/vertex_ai/vertex_llm_base.py @@ -6,7 +6,7 @@ Handles Authentication and generating request urls for Vertex AI and Google AI S import json import os -from typing import TYPE_CHECKING, Any, Literal, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple from litellm._logging import verbose_logger from litellm.litellm_core_utils.asyncify import asyncify @@ -28,6 +28,10 @@ class VertexBase(BaseLLM): self.access_token: Optional[str] = None self.refresh_token: Optional[str] = None self._credentials: Optional[GoogleCredentialsObject] = None + self._credentials_project_mapping: Dict[ + Tuple[Optional[VERTEX_CREDENTIALS_TYPES], Optional[str]], + GoogleCredentialsObject, + ] = {} self.project_id: Optional[str] = None self.async_handler: Optional[AsyncHTTPHandler] = None @@ -128,32 +132,11 @@ class VertexBase(BaseLLM): """ if custom_llm_provider == "gemini": return "", "" - if self.access_token is not None: - if project_id is not None: - return self.access_token, project_id - elif self.project_id is not None: - return self.access_token, self.project_id - - if not self._credentials: - self._credentials, cred_project_id = self.load_auth( - credentials=credentials, project_id=project_id - ) - if not self.project_id: - self.project_id = project_id or cred_project_id else: - if self._credentials.expired or not self._credentials.token: - self.refresh_auth(self._credentials) - - if not self.project_id: - self.project_id = self._credentials.quota_project_id - - if not self.project_id: - raise ValueError("Could not resolve project_id") - - if not self._credentials or not self._credentials.token: - raise RuntimeError("Could not resolve API token from the environment") - - return self._credentials.token, project_id or self.project_id + return self.get_access_token( + credentials=credentials, + project_id=project_id, + ) def is_using_v1beta1_features(self, optional_params: dict) -> bool: """ @@ -259,6 +242,101 @@ class VertexBase(BaseLLM): url=url, ) + def get_access_token( + self, + credentials: Optional[VERTEX_CREDENTIALS_TYPES], + project_id: Optional[str], + ) -> Tuple[str, str]: + """ + Get access token and project id + + 1. Check if credentials are already in self._credentials_project_mapping + 2. If not, load credentials and add to self._credentials_project_mapping + 3. Check if loaded credentials have expired + 4. If expired, refresh credentials + 5. Return access token and project id + """ + + # Convert dict credentials to string for caching + cache_credentials = ( + json.dumps(credentials) if isinstance(credentials, dict) else credentials + ) + credential_cache_key = (cache_credentials, project_id) + _credentials: Optional[GoogleCredentialsObject] = None + + verbose_logger.debug( + f"Checking cached credentials for project_id: {project_id}" + ) + + if credential_cache_key in self._credentials_project_mapping: + verbose_logger.debug( + f"Cached credentials found for project_id: {project_id}." + ) + _credentials = self._credentials_project_mapping[credential_cache_key] + verbose_logger.debug("Using cached credentials") + credential_project_id = _credentials.quota_project_id or getattr( + _credentials, "project_id", None + ) + + else: + verbose_logger.debug( + f"Credential cache key not found for project_id: {project_id}, loading new credentials" + ) + + try: + _credentials, credential_project_id = self.load_auth( + credentials=credentials, project_id=project_id + ) + except Exception as e: + verbose_logger.exception( + "Failed to load vertex credentials. Check to see if credentials containing partial/invalid information." + ) + raise e + + if _credentials is None: + raise ValueError( + "Could not resolve credentials - either dynamically or from environment, for project_id: {}".format( + project_id + ) + ) + + self._credentials_project_mapping[credential_cache_key] = _credentials + + ## VALIDATE CREDENTIALS + verbose_logger.debug(f"Validating credentials for project_id: {project_id}") + if ( + project_id is not None + and credential_project_id + and credential_project_id != project_id + ): + raise ValueError( + "Could not resolve project_id. Credential project_id: {} does not match requested project_id: {}".format( + _credentials.quota_project_id, project_id + ) + ) + elif ( + project_id is None + and credential_project_id is not None + and isinstance(credential_project_id, str) + ): + project_id = credential_project_id + + if _credentials.expired: + self.refresh_auth(_credentials) + + ## VALIDATION STEP + if _credentials.token is None or not isinstance(_credentials.token, str): + raise ValueError( + "Could not resolve credentials token. Got None or non-string token - {}".format( + _credentials.token + ) + ) + + if project_id is None: + raise ValueError("Could not resolve project_id") + + return _credentials.token, project_id + async def _ensure_access_token_async( self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], @@ -272,38 +350,14 @@ class VertexBase(BaseLLM): """ if custom_llm_provider == "gemini": return "", "" - if self.access_token is not None: - if project_id is not None: - return self.access_token, project_id - elif self.project_id is not None: - return self.access_token, self.project_id - - if not self._credentials: - try: - self._credentials, cred_project_id = await asyncify(self.load_auth)( - credentials=credentials, project_id=project_id - ) - except Exception: - verbose_logger.exception( - "Failed to load vertex credentials. Check to see if credentials containing partial/invalid information." - ) - raise - if not self.project_id: - self.project_id = project_id or cred_project_id else: - if self._credentials.expired or not self._credentials.token: - await asyncify(self.refresh_auth)(self._credentials) - - if not self.project_id: - self.project_id = self._credentials.quota_project_id - - if not self.project_id: - raise ValueError("Could not resolve project_id") - - if not self._credentials or not self._credentials.token: - raise RuntimeError("Could not resolve API token from the environment") - - return self._credentials.token, project_id or self.project_id + try: + return await asyncify(self.get_access_token)( + credentials=credentials, + project_id=project_id, + ) + except Exception as e: + raise e def set_headers( self, auth_header: Optional[str], extra_headers: Optional[dict] diff --git a/litellm/llms/xai/chat/transformation.py b/litellm/llms/xai/chat/transformation.py index 734c6eb2e0..614509020e 100644 --- a/litellm/llms/xai/chat/transformation.py +++ b/litellm/llms/xai/chat/transformation.py @@ -1,6 +1,10 @@ -from typing import Optional, Tuple +from typing import List, Optional, Tuple +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + strip_name_from_messages, +) from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import AllMessageValues from ...openai.chat.gpt_transformation import OpenAIGPTConfig @@ -51,3 +55,21 @@ class XAIChatConfig(OpenAIGPTConfig): if value is not None: optional_params[param] = value return optional_params + + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + """ + Handle https://github.com/BerriAI/litellm/issues/9720 + + Filter out 'name' from messages + """ + messages = strip_name_from_messages(messages) + return super().transform_request( + model, messages, optional_params, litellm_params, headers + ) diff --git a/litellm/main.py b/litellm/main.py index f69454aaad..cd7d255e21 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -51,6 +51,10 @@ from litellm import ( # type: ignore get_litellm_params, get_optional_params, ) +from litellm.constants import ( + DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT, + DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT, +) from litellm.exceptions import LiteLLMUnknownProvider from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_for_health_check @@ -106,7 +110,10 @@ from .litellm_core_utils.fallback_utils import ( async_completion_with_fallbacks, completion_with_fallbacks, ) -from .litellm_core_utils.prompt_templates.common_utils import get_completion_messages +from .litellm_core_utils.prompt_templates.common_utils import ( + get_completion_messages, + update_messages_with_model_file_ids, +) from .litellm_core_utils.prompt_templates.factory import ( custom_prompt, function_call_prompt, @@ -131,11 +138,10 @@ from .llms.cohere.embed import handler as cohere_embed from .llms.custom_httpx.aiohttp_handler import BaseLLMAIOHTTPHandler from .llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler from .llms.custom_llm import CustomLLM, custom_chat_llm_router -from .llms.databricks.chat.handler import DatabricksChatCompletion from .llms.databricks.embed.handler import DatabricksEmbeddingHandler from .llms.deprecated_providers import aleph_alpha, palm from .llms.groq.chat.handler import GroqChatCompletion -from .llms.huggingface.chat.handler import Huggingface +from .llms.huggingface.embedding.handler import HuggingFaceEmbedding from .llms.nlp_cloud.chat.handler import completion as nlp_cloud_chat_completion from .llms.ollama.completion import handler as ollama from .llms.oobabooga.chat import oobabooga @@ -208,7 +214,6 @@ openai_chat_completions = OpenAIChatCompletion() openai_text_completions = OpenAITextCompletion() openai_audio_transcriptions = OpenAIAudioTranscription() openai_image_variations = OpenAIImageVariationsHandler() -databricks_chat_completions = DatabricksChatCompletion() groq_chat_completions = GroqChatCompletion() azure_ai_embedding = AzureAIEmbedding() anthropic_chat_completions = AnthropicChatCompletion() @@ -216,7 +221,7 @@ azure_chat_completions = AzureChatCompletion() azure_o1_chat_completions = AzureOpenAIO1ChatCompletion() azure_text_completions = AzureTextCompletion() azure_audio_transcriptions = AzureAudioTranscription() -huggingface = Huggingface() +huggingface_embed = HuggingFaceEmbedding() predibase_chat_completions = PredibaseChatCompletion() codestral_text_completions = CodestralTextCompletion() bedrock_converse_chat_completion = BedrockConverseLLM() @@ -445,7 +450,7 @@ async def acompletion( fallbacks = fallbacks or litellm.model_fallbacks if fallbacks is not None: response = await async_completion_with_fallbacks( - **completion_kwargs, kwargs={"fallbacks": fallbacks} + **completion_kwargs, kwargs={"fallbacks": fallbacks, **kwargs} ) if response is None: raise Exception( @@ -740,7 +745,12 @@ def mock_completion( setattr( model_response, "usage", - Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30), + Usage( + prompt_tokens=DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT, + completion_tokens=DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT, + total_tokens=DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT + + DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT, + ), ) try: @@ -944,7 +954,6 @@ def completion( # type: ignore # noqa: PLR0915 non_default_params = get_non_default_completion_params(kwargs=kwargs) litellm_params = {} # used to prevent unbound var errors ## PROMPT MANAGEMENT HOOKS ## - if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None: ( model, @@ -1059,6 +1068,15 @@ def completion( # type: ignore # noqa: PLR0915 if eos_token: custom_prompt_dict[model]["eos_token"] = eos_token + if kwargs.get("model_file_id_mapping"): + messages = update_messages_with_model_file_ids( + messages=messages, + model_id=kwargs.get("model_info", {}).get("id", None), + model_file_id_mapping=cast( + Dict[str, Dict[str, str]], kwargs.get("model_file_id_mapping") + ), + ) + provider_config: Optional[BaseConfig] = None if custom_llm_provider is not None and custom_llm_provider in [ provider.value for provider in LlmProviders @@ -1115,6 +1133,7 @@ def completion( # type: ignore # noqa: PLR0915 messages=messages, reasoning_effort=reasoning_effort, thinking=thinking, + allowed_openai_params=kwargs.get("allowed_openai_params"), **non_default_params, ) @@ -2122,7 +2141,6 @@ def completion( # type: ignore # noqa: PLR0915 response = model_response elif custom_llm_provider == "huggingface": - custom_llm_provider = "huggingface" huggingface_key = ( api_key or litellm.huggingface_key @@ -2131,40 +2149,23 @@ def completion( # type: ignore # noqa: PLR0915 or litellm.api_key ) hf_headers = headers or litellm.headers - - custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict - model_response = huggingface.completion( + response = base_llm_http_handler.completion( model=model, messages=messages, - api_base=api_base, # type: ignore - headers=hf_headers or {}, + headers=hf_headers, model_response=model_response, - print_verbose=print_verbose, - optional_params=optional_params, - litellm_params=litellm_params, - logger_fn=logger_fn, - encoding=encoding, api_key=huggingface_key, + api_base=api_base, acompletion=acompletion, logging_obj=logging, - custom_prompt_dict=custom_prompt_dict, + optional_params=optional_params, + litellm_params=litellm_params, timeout=timeout, # type: ignore client=client, + custom_llm_provider=custom_llm_provider, + encoding=encoding, + stream=stream, ) - if ( - "stream" in optional_params - and optional_params["stream"] is True - and acompletion is False - ): - # don't try to access stream object, - response = CustomStreamWrapper( - model_response, - model, - custom_llm_provider="huggingface", - logging_obj=logging, - ) - return response - response = model_response elif custom_llm_provider == "oobabooga": custom_llm_provider = "oobabooga" model_response = oobabooga.completion( @@ -2209,24 +2210,22 @@ def completion( # type: ignore # noqa: PLR0915 ## COMPLETION CALL try: - response = databricks_chat_completions.completion( + response = base_llm_http_handler.completion( model=model, + stream=stream, messages=messages, - headers=headers, - model_response=model_response, - print_verbose=print_verbose, - api_key=api_key, - api_base=api_base, acompletion=acompletion, - logging_obj=logging, + api_base=api_base, + model_response=model_response, optional_params=optional_params, litellm_params=litellm_params, - logger_fn=logger_fn, - timeout=timeout, # type: ignore - custom_prompt_dict=custom_prompt_dict, - client=client, # pass AsyncOpenAI, OpenAI client - encoding=encoding, custom_llm_provider="databricks", + timeout=timeout, + headers=headers, + encoding=encoding, + api_key=api_key, + logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements + client=client, ) except Exception as e: ## LOGGING - log the original exception returned @@ -3066,7 +3065,7 @@ def completion( # type: ignore # noqa: PLR0915 "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, - "top_k": kwargs.get("top_k", 40), + "top_k": kwargs.get("top_k"), }, }, ) @@ -3606,7 +3605,7 @@ def embedding( # noqa: PLR0915 or get_secret("HUGGINGFACE_API_KEY") or litellm.api_key ) # type: ignore - response = huggingface.embedding( + response = huggingface_embed.embedding( model=model, input=input, encoding=encoding, # type: ignore @@ -5789,6 +5788,19 @@ def stream_chunk_builder( # noqa: PLR0915 "content" ] = processor.get_combined_content(content_chunks) + reasoning_chunks = [ + chunk + for chunk in chunks + if len(chunk["choices"]) > 0 + and "reasoning_content" in chunk["choices"][0]["delta"] + and chunk["choices"][0]["delta"]["reasoning_content"] is not None + ] + + if len(reasoning_chunks) > 0: + response["choices"][0]["message"][ + "reasoning_content" + ] = processor.get_combined_reasoning_content(reasoning_chunks) + audio_chunks = [ chunk for chunk in chunks @@ -5803,11 +5815,14 @@ def stream_chunk_builder( # noqa: PLR0915 completion_output = get_content_from_model_response(response) + reasoning_tokens = processor.count_reasoning_tokens(response) + usage = processor.calculate_usage( chunks=chunks, model=model, completion_output=completion_output, messages=messages, + reasoning_tokens=reasoning_tokens, ) setattr(response, "usage", usage) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 28f8acd21c..e345815fb2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -88,6 +88,24 @@ "search_context_size_high": 0.050 } }, + "watsonx/ibm/granite-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.0002, + "output_cost_per_token": 0.0002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_parallel_function_calling": false, + "supports_vision": false, + "supports_audio_input": false, + "supports_audio_output": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true + }, "gpt-4o-search-preview-2025-03-11": { "max_tokens": 16384, "max_input_tokens": 128000, @@ -3303,6 +3321,24 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "groq/whisper-large-v3": { + "mode": "audio_transcription", + "input_cost_per_second": 0.00003083, + "output_cost_per_second": 0, + "litellm_provider": "groq" + }, + "groq/whisper-large-v3-turbo": { + "mode": "audio_transcription", + "input_cost_per_second": 0.00001111, + "output_cost_per_second": 0, + "litellm_provider": "groq" + }, + "groq/distil-whisper-large-v3-en": { + "mode": "audio_transcription", + "input_cost_per_second": 0.00000556, + "output_cost_per_second": 0, + "litellm_provider": "groq" + }, "cerebras/llama3.1-8b": { "max_tokens": 128000, "max_input_tokens": 128000, @@ -4650,6 +4686,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini-2.0-flash": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000004, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "supports_audio_input": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supports_tool_choice": true, + "source": "https://ai.google.dev/pricing#2_0flash" + }, "gemini-2.0-flash-lite": { "max_input_tokens": 1048576, "max_output_tokens": 8192, @@ -4786,6 +4847,33 @@ "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, + "gemini/gemini-2.5-pro-preview-03-25": { + "max_tokens": 65536, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.0000010, + "output_cost_per_token_above_128k_tokens": 0.000015, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" + }, "gemini/gemini-2.0-flash-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -6604,6 +6692,14 @@ "mode": "chat", "supports_tool_choice": true }, + "mistralai/mistral-small-3.1-24b-instruct": { + "max_tokens": 32000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000003, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_tool_choice": true + }, "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { "max_tokens": 32769, "input_cost_per_token": 0.0000005, @@ -6732,12 +6828,38 @@ "supports_vision": false, "supports_tool_choice": true }, + "openrouter/openai/o3-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_tool_choice": true + }, + "openrouter/openai/o3-mini-high": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_tool_choice": true + }, "openrouter/openai/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 0.000005, - "output_cost_per_token": 0.000015, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, diff --git a/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_buildManifest.js b/litellm/proxy/_experimental/out/_next/static/6JTLlefcvwIDKPU9VXW-e/_buildManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_buildManifest.js rename to litellm/proxy/_experimental/out/_next/static/6JTLlefcvwIDKPU9VXW-e/_buildManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_ssgManifest.js b/litellm/proxy/_experimental/out/_next/static/6JTLlefcvwIDKPU9VXW-e/_ssgManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_ssgManifest.js rename to litellm/proxy/_experimental/out/_next/static/6JTLlefcvwIDKPU9VXW-e/_ssgManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/250-938b16708aae1136.js b/litellm/proxy/_experimental/out/_next/static/chunks/250-938b16708aae1136.js new file mode 100644 index 0000000000..5645a08f11 --- /dev/null +++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-938b16708aae1136.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[250],{19250:function(e,t,o){o.d(t,{$I:function(){return q},AZ:function(){return L},Au:function(){return eu},BL:function(){return eR},Br:function(){return F},E9:function(){return eU},EG:function(){return eD},EY:function(){return eq},Eb:function(){return C},FC:function(){return en},Gh:function(){return eF},H1:function(){return v},H2:function(){return n},Hx:function(){return ey},I1:function(){return j},It:function(){return x},J$:function(){return ee},K8:function(){return d},K_:function(){return eH},LY:function(){return eG},Lp:function(){return eO},N3:function(){return eE},N8:function(){return Q},NL:function(){return eY},NV:function(){return f},Nc:function(){return eb},O3:function(){return eI},OD:function(){return em},OU:function(){return el},Of:function(){return b},Og:function(){return y},Ov:function(){return E},PT:function(){return D},Qg:function(){return ej},RQ:function(){return _},Rg:function(){return K},Sb:function(){return ev},So:function(){return Y},Tj:function(){return eX},VA:function(){return G},Vt:function(){return eL},W_:function(){return I},X:function(){return et},XO:function(){return k},Xd:function(){return eg},Xm:function(){return S},YU:function(){return ez},Zr:function(){return m},a6:function(){return B},ao:function(){return eZ},b1:function(){return es},cq:function(){return A},cu:function(){return ex},eH:function(){return H},eZ:function(){return eN},fP:function(){return $},g:function(){return eK},gX:function(){return eC},h3:function(){return ea},hT:function(){return e_},hy:function(){return u},ix:function(){return M},j2:function(){return eo},jA:function(){return eM},jE:function(){return eJ},kK:function(){return p},kn:function(){return Z},lP:function(){return h},lU:function(){return e0},lg:function(){return ek},mR:function(){return W},m_:function(){return R},mp:function(){return eV},n$:function(){return ew},nd:function(){return eW},o6:function(){return X},oC:function(){return eT},pf:function(){return eA},qI:function(){return g},qk:function(){return eQ},qm:function(){return w},r6:function(){return O},rs:function(){return N},s0:function(){return z},sN:function(){return eB},t$:function(){return P},t0:function(){return ef},t3:function(){return e$},tB:function(){return e1},tN:function(){return ec},u5:function(){return er},um:function(){return eS},v9:function(){return ep},vh:function(){return eP},wX:function(){return T},wd:function(){return ei},xA:function(){return eh},xX:function(){return J},zg:function(){return ed}});var r=o(20347),a=o(41021);let n=null;console.log=function(){};let c=0,s=e=>new Promise(t=>setTimeout(t,e)),l=async e=>{let t=Date.now();t-c>6e4?(e.includes("Authentication Error - Expired Key")&&(a.ZP.info("UI Session Expired. Logging out."),c=t,await s(3e3),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;",window.location.href="/"),c=t):console.log("Error suppressed to prevent spam:",e)},i="Authorization";function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"Authorization";console.log("setGlobalLitellmHeaderName: ".concat(e)),i=e}let h=async()=>{let e=n?"".concat(n,"/openapi.json"):"/openapi.json",t=await fetch(e);return await t.json()},w=async e=>{try{let t=n?"".concat(n,"/get/litellm_model_cost_map"):"/get/litellm_model_cost_map",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}}),r=await o.json();return console.log("received litellm model cost data: ".concat(r)),r}catch(e){throw console.error("Failed to get model cost map:",e),e}},p=async(e,t)=>{try{let o=n?"".concat(n,"/model/new"):"/model/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text()||"Network response was not ok";throw a.ZP.error(e),Error(e)}let c=await r.json();return console.log("API Response:",c),a.ZP.destroy(),a.ZP.success("Model ".concat(t.model_name," created successfully"),2),c}catch(e){throw console.error("Failed to create key:",e),e}},u=async e=>{try{let t=n?"".concat(n,"/model/settings"):"/model/settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){console.error("Failed to get model settings:",e)}},y=async(e,t)=>{console.log("model_id in model delete call: ".concat(t));try{let o=n?"".concat(n,"/model/delete"):"/model/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,t)=>{if(console.log("budget_id in budget delete call: ".concat(t)),null!=e)try{let o=n?"".concat(n,"/budget/delete"):"/budget/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},m=async(e,t)=>{try{console.log("Form Values in budgetCreateCall:",t),console.log("Form Values after check:",t);let o=n?"".concat(n,"/budget/new"):"/budget/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,t)=>{try{console.log("Form Values in budgetUpdateCall:",t),console.log("Form Values after check:",t);let o=n?"".concat(n,"/budget/update"):"/budget/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,t)=>{try{let o=n?"".concat(n,"/invitation/new"):"/invitation/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},_=async e=>{try{let t=n?"".concat(n,"/alerting/settings"):"/alerting/settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},T=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=n?"".concat(n,"/key/generate"):"/key/generate",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.auto_create_key=!1,o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=n?"".concat(n,"/user/new"):"/user/new",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,t)=>{try{let o=n?"".concat(n,"/key/delete"):"/key/delete";console.log("in keyDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to create key:",e),e}},C=async(e,t)=>{try{let o=n?"".concat(n,"/user/delete"):"/user/delete";console.log("in userDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_ids:t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete user(s):",e),e}},N=async(e,t)=>{try{let o=n?"".concat(n,"/team/delete"):"/team/delete";console.log("in teamDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete key:",e),e}},b=async function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null,r=arguments.length>3&&void 0!==arguments[3]?arguments[3]:null;try{let a=n?"".concat(n,"/user/list"):"/user/list";console.log("in userListCall");let c=new URLSearchParams;if(t&&t.length>0){let e=t.join(",");c.append("user_ids",e)}o&&c.append("page",o.toString()),r&&c.append("page_size",r.toString());let s=c.toString();s&&(a+="?".concat(s));let d=await fetch(a,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log("/user/list API Response:",h),h}catch(e){throw console.error("Failed to create key:",e),e}},F=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,c=arguments.length>5?arguments[5]:void 0;try{let s;if(r){s=n?"".concat(n,"/user/list"):"/user/list";let e=new URLSearchParams;null!=a&&e.append("page",a.toString()),null!=c&&e.append("page_size",c.toString()),s+="?".concat(e.toString())}else s=n?"".concat(n,"/user/info"):"/user/info","Admin"===o||"Admin Viewer"===o||t&&(s+="?user_id=".concat(t));console.log("Requesting user data from:",s);let d=await fetch(s,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log("API Response:",h),h}catch(e){throw console.error("Failed to fetch user data:",e),e}},S=async(e,t)=>{try{let o=n?"".concat(n,"/team/info"):"/team/info";t&&(o="".concat(o,"?team_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},x=async function(e,t){let o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;try{let r=n?"".concat(n,"/team/list"):"/team/list";console.log("in teamInfoCall");let a=new URLSearchParams;o&&a.append("user_id",o.toString()),t&&a.append("organization_id",t.toString());let c=a.toString();c&&(r+="?".concat(c));let s=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}let d=await s.json();return console.log("/team/list API Response:",d),d}catch(e){throw console.error("Failed to create key:",e),e}},B=async e=>{try{let t=n?"".concat(n,"/team/available"):"/team/available";console.log("in availableTeamListCall");let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("/team/available_teams API Response:",r),r}catch(e){throw e}},O=async e=>{try{let t=n?"".concat(n,"/organization/list"):"/organization/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,t)=>{try{let o=n?"".concat(n,"/organization/info"):"/organization/info";t&&(o="".concat(o,"?organization_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,t)=>{try{if(console.log("Form Values in organizationCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw console.error("Failed to parse metadata:",e),Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/organization/new"):"/organization/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},G=async(e,t)=>{try{console.log("Form Values in organizationUpdateCall:",t);let o=n?"".concat(n,"/organization/update"):"/organization/update",r=await fetch(o,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update Team Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},A=async(e,t)=>{try{let o=n?"".concat(n,"/organization/delete"):"/organization/delete",r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_ids:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Error deleting organization: ".concat(e))}return await r.json()}catch(e){throw console.error("Failed to delete organization:",e),e}},J=async(e,t,o)=>{try{let r=n?"".concat(n,"/user/daily/activity"):"/user/daily/activity",a=new URLSearchParams;a.append("start_date",t.toISOString()),a.append("end_date",o.toISOString());let c=a.toString();c&&(r+="?".concat(c));let s=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}return await s.json()}catch(e){throw console.error("Failed to create key:",e),e}},I=async e=>{try{let t=n?"".concat(n,"/onboarding/get_token"):"/onboarding/get_token";t+="?invite_link=".concat(e);let o=await fetch(t,{method:"GET",headers:{"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,t,o,r)=>{let a=n?"".concat(n,"/onboarding/claim_token"):"/onboarding/claim_token";try{let n=await fetch(a,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({invitation_link:t,user_id:o,password:r})});if(!n.ok){let e=await n.text();throw l(e),Error("Network response was not ok")}let c=await n.json();return console.log(c),c}catch(e){throw console.error("Failed to delete key:",e),e}},z=async(e,t,o)=>{try{let r=n?"".concat(n,"/key/").concat(t,"/regenerate"):"/key/".concat(t,"/regenerate"),a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(o)});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("Regenerate key Response:",c),c}catch(e){throw console.error("Failed to regenerate key:",e),e}},V=!1,U=null,L=async(e,t,o)=>{try{console.log("modelInfoCall:",e,t,o);let c=n?"".concat(n,"/v2/model/info"):"/v2/model/info";r.ZL.includes(o)||(c+="?user_models_only=true");let s=await fetch(c,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw e+="error shown=".concat(V),V||(e.includes("No model list passed")&&(e="No Models Exist. Click Add Model to get started."),a.ZP.info(e,10),V=!0,U&&clearTimeout(U),U=setTimeout(()=>{V=!1},1e4)),Error("Network response was not ok")}let l=await s.json();return console.log("modelInfoCall:",l),l}catch(e){throw console.error("Failed to create key:",e),e}},M=async(e,t)=>{try{let o=n?"".concat(n,"/v1/model/info"):"/v1/model/info";o+="?litellm_model_id=".concat(t);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");let a=await r.json();return console.log("modelInfoV1Call:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},Z=async e=>{try{let t=n?"".concat(n,"/model_group/info"):"/model_group/info",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("modelHubCall:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},D=async e=>{try{let t=n?"".concat(n,"/get/allowed_ips"):"/get/allowed_ips",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw Error("Network response was not ok: ".concat(e))}let r=await o.json();return console.log("getAllowedIPs:",r),r.data}catch(e){throw console.error("Failed to get allowed IPs:",e),e}},H=async(e,t)=>{try{let o=n?"".concat(n,"/add/allowed_ip"):"/add/allowed_ip",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let a=await r.json();return console.log("addAllowedIP:",a),a}catch(e){throw console.error("Failed to add allowed IP:",e),e}},q=async(e,t)=>{try{let o=n?"".concat(n,"/delete/allowed_ip"):"/delete/allowed_ip",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let a=await r.json();return console.log("deleteAllowedIP:",a),a}catch(e){throw console.error("Failed to delete allowed IP:",e),e}},X=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics"):"/model/metrics";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},K=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/model/streaming_metrics"):"/model/streaming_metrics";t&&(a="".concat(a,"?_selected_model_group=").concat(t,"&startTime=").concat(o,"&endTime=").concat(r));let c=await fetch(a,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},$=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics/slow_responses"):"/model/metrics/slow_responses";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},Q=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics/exceptions"):"/model/metrics/exceptions";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},Y=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4&&void 0!==arguments[4]?arguments[4]:null;console.log("in /models calls, globalLitellmHeaderName",i);try{let t=n?"".concat(n,"/models"):"/models",o=new URLSearchParams;!0===r&&o.append("return_wildcard_routes","True"),a&&o.append("team_id",a.toString()),o.toString()&&(t+="?".concat(o.toString()));let c=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},W=async e=>{try{let t=n?"".concat(n,"/global/spend/teams"):"/global/spend/teams";console.log("in teamSpendLogsCall:",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ee=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/tags"):"/global/spend/tags";t&&o&&(a="".concat(a,"?start_date=").concat(t,"&end_date=").concat(o)),r&&(a+="".concat(a,"&tags=").concat(r.join(","))),console.log("in tagsSpendLogsCall:",a);let c=await fetch("".concat(a),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},et=async e=>{try{let t=n?"".concat(n,"/global/spend/all_tag_names"):"/global/spend/all_tag_names";console.log("in global/spend/all_tag_names call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},eo=async e=>{try{let t=n?"".concat(n,"/global/all_end_users"):"/global/all_end_users";console.log("in global/all_end_users call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},er=async(e,t)=>{try{let o=n?"".concat(n,"/user/filter/ui"):"/user/filter/ui";t.get("user_email")&&(o+="?user_email=".concat(t.get("user_email"))),t.get("user_id")&&(o+="?user_id=".concat(t.get("user_id")));let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to create key:",e),e}},ea=async(e,t,o,r,a,c,s,d,h)=>{try{let w=n?"".concat(n,"/spend/logs/ui"):"/spend/logs/ui",p=new URLSearchParams;t&&p.append("api_key",t),o&&p.append("team_id",o),r&&p.append("request_id",r),a&&p.append("start_date",a),c&&p.append("end_date",c),s&&p.append("page",s.toString()),d&&p.append("page_size",d.toString()),h&&p.append("user_id",h);let u=p.toString();u&&(w+="?".concat(u));let y=await fetch(w,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!y.ok){let e=await y.text();throw l(e),Error("Network response was not ok")}let f=await y.json();return console.log("Spend Logs Response:",f),f}catch(e){throw console.error("Failed to fetch spend logs:",e),e}},en=async e=>{try{let t=n?"".concat(n,"/global/spend/logs"):"/global/spend/logs",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ec=async e=>{try{let t=n?"".concat(n,"/global/spend/keys?limit=5"):"/global/spend/keys?limit=5",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},es=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/end_users"):"/global/spend/end_users",c="";c=t?JSON.stringify({api_key:t,startTime:o,endTime:r}):JSON.stringify({startTime:o,endTime:r});let s={method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:c},d=await fetch(a,s);if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log(h),h}catch(e){throw console.error("Failed to create key:",e),e}},el=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/provider"):"/global/spend/provider";o&&r&&(a+="?start_date=".concat(o,"&end_date=").concat(r)),t&&(a+="&api_key=".concat(t));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}let d=await s.json();return console.log(d),d}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ei=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity"):"/global/activity";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ed=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity/cache_hits"):"/global/activity/cache_hits";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},eh=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity/model"):"/global/activity/model";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ew=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/activity/exceptions"):"/global/activity/exceptions";t&&o&&(a+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(a+="&model_group=".concat(r));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let l=await s.json();return console.log(l),l}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ep=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/activity/exceptions/deployment"):"/global/activity/exceptions/deployment";t&&o&&(a+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(a+="&model_group=".concat(r));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let l=await s.json();return console.log(l),l}catch(e){throw console.error("Failed to fetch spend data:",e),e}},eu=async e=>{try{let t=n?"".concat(n,"/global/spend/models?limit=5"):"/global/spend/models?limit=5",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ey=async(e,t,o)=>{try{console.log("Sending model connection test request:",JSON.stringify(t));let a=n?"".concat(n,"/health/test_connection"):"/health/test_connection",c=await fetch(a,{method:"POST",headers:{"Content-Type":"application/json",[i]:"Bearer ".concat(e)},body:JSON.stringify({litellm_params:t,mode:o})}),s=c.headers.get("content-type");if(!s||!s.includes("application/json")){let e=await c.text();throw console.error("Received non-JSON response:",e),Error("Received non-JSON response (".concat(c.status,": ").concat(c.statusText,"). Check network tab for details."))}let l=await c.json();if(!c.ok||"error"===l.status){if("error"===l.status);else{var r;return{status:"error",message:(null===(r=l.error)||void 0===r?void 0:r.message)||"Connection test failed: ".concat(c.status," ").concat(c.statusText)}}}return l}catch(e){throw console.error("Model connection test error:",e),e}},ef=async(e,t)=>{try{console.log("entering keyInfoV1Call");let o=n?"".concat(n,"/key/info"):"/key/info";o="".concat(o,"?key=").concat(t);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(console.log("response",r),!r.ok){let e=await r.text();l(e),a.ZP.error("Failed to fetch key info - "+e)}let c=await r.json();return console.log("data",c),c}catch(e){throw console.error("Failed to fetch key info:",e),e}},em=async(e,t,o,r,a)=>{try{let c=n?"".concat(n,"/key/list"):"/key/list";console.log("in keyListCall");let s=new URLSearchParams;o&&s.append("team_id",o.toString()),t&&s.append("organization_id",t.toString()),r&&s.append("page",r.toString()),a&&s.append("size",a.toString()),s.append("return_full_object","true"),s.append("include_team_keys","true");let d=s.toString();d&&(c+="?".concat(d));let h=await fetch(c,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!h.ok){let e=await h.text();throw l(e),Error("Network response was not ok")}let w=await h.json();return console.log("/team/list API Response:",w),w}catch(e){throw console.error("Failed to create key:",e),e}},eg=async(e,t)=>{try{let o=n?"".concat(n,"/user/get_users?role=").concat(t):"/user/get_users?role=".concat(t);console.log("in userGetAllUsersCall:",o);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},ek=async e=>{try{let t=n?"".concat(n,"/user/available_roles"):"/user/available_roles",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("response from user/available_role",r),r}catch(e){throw e}},e_=async(e,t)=>{try{if(console.log("Form Values in teamCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/team/new"):"/team/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eT=async(e,t)=>{try{if(console.log("Form Values in credentialCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/credentials"):"/credentials",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eE=async e=>{try{let t=n?"".concat(n,"/credentials"):"/credentials";console.log("in credentialListCall");let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("/credentials API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},ej=async(e,t,o)=>{try{let r=n?"".concat(n,"/credentials"):"/credentials";t?r+="/by_name/".concat(t):o&&(r+="/by_model/".concat(o)),console.log("in credentialListCall");let a=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("/credentials API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eC=async(e,t)=>{try{let o=n?"".concat(n,"/credentials/").concat(t):"/credentials/".concat(t);console.log("in credentialDeleteCall:",t);let r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete key:",e),e}},eN=async(e,t,o)=>{try{if(console.log("Form Values in credentialUpdateCall:",o),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let r=n?"".concat(n,"/credentials/").concat(t):"/credentials/".concat(t),a=await fetch(r,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eb=async(e,t)=>{try{if(console.log("Form Values in keyUpdateCall:",t),t.model_tpm_limit){console.log("formValues.model_tpm_limit:",t.model_tpm_limit);try{t.model_tpm_limit=JSON.parse(t.model_tpm_limit)}catch(e){throw Error("Failed to parse model_tpm_limit: "+e)}}if(t.model_rpm_limit){console.log("formValues.model_rpm_limit:",t.model_rpm_limit);try{t.model_rpm_limit=JSON.parse(t.model_rpm_limit)}catch(e){throw Error("Failed to parse model_rpm_limit: "+e)}}let o=n?"".concat(n,"/key/update"):"/key/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update key Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eF=async(e,t)=>{try{console.log("Form Values in teamUpateCall:",t);let o=n?"".concat(n,"/team/update"):"/team/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update Team Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eS=async(e,t)=>{try{console.log("Form Values in modelUpateCall:",t);let o=n?"".concat(n,"/model/update"):"/model/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error update from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update model Response:",a),a}catch(e){throw console.error("Failed to update model:",e),e}},ex=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_add"):"/team/member_add",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,member:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eB=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_update"):"/team/member_update",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,role:o.role,user_id:o.user_id})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eO=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_delete"):"/team/member_delete",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,...void 0!==o.user_email&&{user_email:o.user_email},...void 0!==o.user_id&&{user_id:o.user_id}})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eP=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/organization/member_add"):"/organization/member_add",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,member:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create organization member:",e),e}},ev=async(e,t,o)=>{try{console.log("Form Values in organizationMemberDeleteCall:",o);let r=n?"".concat(n,"/organization/member_delete"):"/organization/member_delete",a=await fetch(r,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,user_id:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to delete organization member:",e),e}},eG=async(e,t,o)=>{try{console.log("Form Values in organizationMemberUpdateCall:",o);let r=n?"".concat(n,"/organization/member_update"):"/organization/member_update",a=await fetch(r,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to update organization member:",e),e}},eA=async(e,t,o)=>{try{console.log("Form Values in userUpdateUserCall:",t);let r=n?"".concat(n,"/user/update"):"/user/update",a={...t};null!==o&&(a.user_role=o),a=JSON.stringify(a);let c=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:a});if(!c.ok){let e=await c.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await c.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},eJ=async(e,t)=>{try{let o=n?"".concat(n,"/health/services?service=").concat(t):"/health/services?service=".concat(t);console.log("Checking Slack Budget Alerts service health");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error(e)}let c=await r.json();return a.ZP.success("Test request to ".concat(t," made - check logs/alerts on ").concat(t," to verify")),c}catch(e){throw console.error("Failed to perform health check:",e),e}},eI=async e=>{try{let t=n?"".concat(n,"/budget/list"):"/budget/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eR=async(e,t,o)=>{try{let t=n?"".concat(n,"/get/config/callbacks"):"/get/config/callbacks",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},ez=async e=>{try{let t=n?"".concat(n,"/config/list?config_type=general_settings"):"/config/list?config_type=general_settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eV=async e=>{try{let t=n?"".concat(n,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eU=async(e,t)=>{try{let o=n?"".concat(n,"/config/field/info?field_name=").concat(t):"/config/field/info?field_name=".concat(t),r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eL=async(e,t)=>{try{let o=n?"".concat(n,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eM=async(e,t,o)=>{try{let r=n?"".concat(n,"/config/field/update"):"/config/field/update",c=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,field_value:o,config_type:"general_settings"})});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}let s=await c.json();return a.ZP.success("Successfully updated value!"),s}catch(e){throw console.error("Failed to set callbacks:",e),e}},eZ=async(e,t)=>{try{let o=n?"".concat(n,"/config/field/delete"):"/config/field/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,config_type:"general_settings"})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let c=await r.json();return a.ZP.success("Field reset on proxy"),c}catch(e){throw console.error("Failed to get callbacks:",e),e}},eD=async(e,t)=>{try{let o=n?"".concat(n,"/config/pass_through_endpoint?endpoint_id=").concat(t):"/config/pass_through_endpoint".concat(t),r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eH=async(e,t)=>{try{let o=n?"".concat(n,"/config/update"):"/config/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eq=async e=>{try{let t=n?"".concat(n,"/health"):"/health",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to call /health:",e),e}},eX=async e=>{try{let t=n?"".concat(n,"/cache/ping"):"/cache/ping",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error(e)}return await o.json()}catch(e){throw console.error("Failed to call /cache/ping:",e),e}},eK=async e=>{try{let t=n?"".concat(n,"/sso/get/ui_settings"):"/sso/get/ui_settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},e$=async e=>{try{let t=n?"".concat(n,"/guardrails/list"):"/guardrails/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Guardrails list response:",r),r}catch(e){throw console.error("Failed to fetch guardrails list:",e),e}},eQ=async(e,t,o)=>{try{let r=n?"".concat(n,"/spend/logs/ui/").concat(t,"?start_date=").concat(encodeURIComponent(o)):"/spend/logs/ui/".concat(t,"?start_date=").concat(encodeURIComponent(o));console.log("Fetching log details from:",r);let a=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("Fetched log details:",c),c}catch(e){throw console.error("Failed to fetch log details:",e),e}},eY=async e=>{try{let t=n?"".concat(n,"/get/internal_user_settings"):"/get/internal_user_settings";console.log("Fetching SSO settings from:",t);let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Fetched SSO settings:",r),r}catch(e){throw console.error("Failed to fetch SSO settings:",e),e}},eW=async(e,t)=>{try{let o=n?"".concat(n,"/update/internal_user_settings"):"/update/internal_user_settings";console.log("Updating internal user settings:",t);let r=await fetch(o,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(t)});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let c=await r.json();return console.log("Updated internal user settings:",c),a.ZP.success("Internal user settings updated successfully"),c}catch(e){throw console.error("Failed to update internal user settings:",e),e}},e0=async e=>{try{let t=n?"".concat(n,"/mcp/tools/list"):"/mcp/tools/list";console.log("Fetching MCP tools from:",t);let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Fetched MCP tools:",r),r}catch(e){throw console.error("Failed to fetch MCP tools:",e),e}},e1=async(e,t,o)=>{try{let r=n?"".concat(n,"/mcp/tools/call"):"/mcp/tools/call";console.log("Calling MCP tool:",t,"with arguments:",o);let a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({name:t,arguments:o})});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("MCP tool call response:",c),c}catch(e){throw console.error("Failed to call MCP tool:",e),e}}},20347:function(e,t,o){o.d(t,{LQ:function(){return n},ZL:function(){return r},lo:function(){return a}});let r=["Admin","Admin Viewer","proxy_admin","proxy_admin_viewer","org_admin"],a=["Internal User","Internal Viewer"],n=["Internal User","Admin"]}}]); \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/250-dfc03a6fb4f0d254.js b/litellm/proxy/_experimental/out/_next/static/chunks/250-dfc03a6fb4f0d254.js deleted file mode 100644 index ffb466ac4d..0000000000 --- a/litellm/proxy/_experimental/out/_next/static/chunks/250-dfc03a6fb4f0d254.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[250],{19250:function(e,t,o){o.d(t,{$I:function(){return q},AZ:function(){return L},Au:function(){return eu},BL:function(){return eR},Br:function(){return F},E9:function(){return eU},EG:function(){return eD},EY:function(){return eq},Eb:function(){return C},FC:function(){return en},Gh:function(){return eF},H1:function(){return v},H2:function(){return n},Hx:function(){return ey},I1:function(){return j},It:function(){return x},J$:function(){return ee},K8:function(){return d},K_:function(){return eH},LY:function(){return eG},Lp:function(){return eO},N3:function(){return eE},N8:function(){return Q},NL:function(){return eY},NV:function(){return f},Nc:function(){return eb},O3:function(){return eI},OD:function(){return em},OU:function(){return el},Of:function(){return b},Og:function(){return y},Ov:function(){return E},PT:function(){return D},Qg:function(){return ej},RQ:function(){return _},Rg:function(){return K},Sb:function(){return ev},So:function(){return Y},Tj:function(){return eX},VA:function(){return G},Vt:function(){return eL},W_:function(){return I},X:function(){return et},XO:function(){return k},Xd:function(){return eg},Xm:function(){return S},YU:function(){return ez},Zr:function(){return m},a6:function(){return B},ao:function(){return eZ},b1:function(){return es},cq:function(){return A},cu:function(){return ex},eH:function(){return H},eZ:function(){return eN},fP:function(){return $},g:function(){return eK},gX:function(){return eC},h3:function(){return ea},hT:function(){return e_},hy:function(){return u},ix:function(){return M},j2:function(){return eo},jA:function(){return eM},jE:function(){return eJ},kK:function(){return p},kn:function(){return Z},lP:function(){return h},lU:function(){return e0},lg:function(){return ek},mR:function(){return W},m_:function(){return R},mp:function(){return eV},n$:function(){return ew},nd:function(){return eW},o6:function(){return X},oC:function(){return eT},pf:function(){return eA},qI:function(){return g},qk:function(){return eQ},qm:function(){return w},r6:function(){return O},rs:function(){return N},s0:function(){return z},sN:function(){return eB},t$:function(){return P},t0:function(){return ef},t3:function(){return e$},tB:function(){return e1},tN:function(){return ec},u5:function(){return er},um:function(){return eS},v9:function(){return ep},vh:function(){return eP},wX:function(){return T},wd:function(){return ei},xA:function(){return eh},xX:function(){return J},zg:function(){return ed}});var r=o(20347),a=o(41021);let n=null;console.log=function(){};let c=0,s=e=>new Promise(t=>setTimeout(t,e)),l=async e=>{let t=Date.now();t-c>6e4?(e.includes("Authentication Error - Expired Key")&&(a.ZP.info("UI Session Expired. Logging out."),c=t,await s(3e3),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;",window.location.href="/"),c=t):console.log("Error suppressed to prevent spam:",e)},i="Authorization";function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"Authorization";console.log("setGlobalLitellmHeaderName: ".concat(e)),i=e}let h=async()=>{let e=n?"".concat(n,"/openapi.json"):"/openapi.json",t=await fetch(e);return await t.json()},w=async e=>{try{let t=n?"".concat(n,"/get/litellm_model_cost_map"):"/get/litellm_model_cost_map",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}}),r=await o.json();return console.log("received litellm model cost data: ".concat(r)),r}catch(e){throw console.error("Failed to get model cost map:",e),e}},p=async(e,t)=>{try{let o=n?"".concat(n,"/model/new"):"/model/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text()||"Network response was not ok";throw a.ZP.error(e),Error(e)}let c=await r.json();return console.log("API Response:",c),a.ZP.destroy(),a.ZP.success("Model ".concat(t.model_name," created successfully"),2),c}catch(e){throw console.error("Failed to create key:",e),e}},u=async e=>{try{let t=n?"".concat(n,"/model/settings"):"/model/settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){console.error("Failed to get model settings:",e)}},y=async(e,t)=>{console.log("model_id in model delete call: ".concat(t));try{let o=n?"".concat(n,"/model/delete"):"/model/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,t)=>{if(console.log("budget_id in budget delete call: ".concat(t)),null!=e)try{let o=n?"".concat(n,"/budget/delete"):"/budget/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},m=async(e,t)=>{try{console.log("Form Values in budgetCreateCall:",t),console.log("Form Values after check:",t);let o=n?"".concat(n,"/budget/new"):"/budget/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,t)=>{try{console.log("Form Values in budgetUpdateCall:",t),console.log("Form Values after check:",t);let o=n?"".concat(n,"/budget/update"):"/budget/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,t)=>{try{let o=n?"".concat(n,"/invitation/new"):"/invitation/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},_=async e=>{try{let t=n?"".concat(n,"/alerting/settings"):"/alerting/settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},T=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=n?"".concat(n,"/key/generate"):"/key/generate",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=n?"".concat(n,"/user/new"):"/user/new",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,t)=>{try{let o=n?"".concat(n,"/key/delete"):"/key/delete";console.log("in keyDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to create key:",e),e}},C=async(e,t)=>{try{let o=n?"".concat(n,"/user/delete"):"/user/delete";console.log("in userDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_ids:t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete user(s):",e),e}},N=async(e,t)=>{try{let o=n?"".concat(n,"/team/delete"):"/team/delete";console.log("in teamDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete key:",e),e}},b=async function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null,r=arguments.length>3&&void 0!==arguments[3]?arguments[3]:null;try{let a=n?"".concat(n,"/user/list"):"/user/list";console.log("in userListCall");let c=new URLSearchParams;if(t&&t.length>0){let e=t.join(",");c.append("user_ids",e)}o&&c.append("page",o.toString()),r&&c.append("page_size",r.toString());let s=c.toString();s&&(a+="?".concat(s));let d=await fetch(a,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log("/user/list API Response:",h),h}catch(e){throw console.error("Failed to create key:",e),e}},F=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,c=arguments.length>5?arguments[5]:void 0;try{let s;if(r){s=n?"".concat(n,"/user/list"):"/user/list";let e=new URLSearchParams;null!=a&&e.append("page",a.toString()),null!=c&&e.append("page_size",c.toString()),s+="?".concat(e.toString())}else s=n?"".concat(n,"/user/info"):"/user/info","Admin"===o||"Admin Viewer"===o||t&&(s+="?user_id=".concat(t));console.log("Requesting user data from:",s);let d=await fetch(s,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log("API Response:",h),h}catch(e){throw console.error("Failed to fetch user data:",e),e}},S=async(e,t)=>{try{let o=n?"".concat(n,"/team/info"):"/team/info";t&&(o="".concat(o,"?team_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},x=async function(e,t){let o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;try{let r=n?"".concat(n,"/team/list"):"/team/list";console.log("in teamInfoCall");let a=new URLSearchParams;o&&a.append("user_id",o.toString()),t&&a.append("organization_id",t.toString());let c=a.toString();c&&(r+="?".concat(c));let s=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}let d=await s.json();return console.log("/team/list API Response:",d),d}catch(e){throw console.error("Failed to create key:",e),e}},B=async e=>{try{let t=n?"".concat(n,"/team/available"):"/team/available";console.log("in availableTeamListCall");let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("/team/available_teams API Response:",r),r}catch(e){throw e}},O=async e=>{try{let t=n?"".concat(n,"/organization/list"):"/organization/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,t)=>{try{let o=n?"".concat(n,"/organization/info"):"/organization/info";t&&(o="".concat(o,"?organization_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,t)=>{try{if(console.log("Form Values in organizationCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw console.error("Failed to parse metadata:",e),Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/organization/new"):"/organization/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},G=async(e,t)=>{try{console.log("Form Values in organizationUpdateCall:",t);let o=n?"".concat(n,"/organization/update"):"/organization/update",r=await fetch(o,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update Team Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},A=async(e,t)=>{try{let o=n?"".concat(n,"/organization/delete"):"/organization/delete",r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_ids:[t]})});if(!r.ok){let e=await r.text();throw l(e),Error("Error deleting organization: ".concat(e))}return await r.json()}catch(e){throw console.error("Failed to delete organization:",e),e}},J=async(e,t,o)=>{try{let r=n?"".concat(n,"/user/daily/activity"):"/user/daily/activity",a=new URLSearchParams;a.append("start_date",t.toISOString()),a.append("end_date",o.toISOString());let c=a.toString();c&&(r+="?".concat(c));let s=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}return await s.json()}catch(e){throw console.error("Failed to create key:",e),e}},I=async e=>{try{let t=n?"".concat(n,"/onboarding/get_token"):"/onboarding/get_token";t+="?invite_link=".concat(e);let o=await fetch(t,{method:"GET",headers:{"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,t,o,r)=>{let a=n?"".concat(n,"/onboarding/claim_token"):"/onboarding/claim_token";try{let n=await fetch(a,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({invitation_link:t,user_id:o,password:r})});if(!n.ok){let e=await n.text();throw l(e),Error("Network response was not ok")}let c=await n.json();return console.log(c),c}catch(e){throw console.error("Failed to delete key:",e),e}},z=async(e,t,o)=>{try{let r=n?"".concat(n,"/key/").concat(t,"/regenerate"):"/key/".concat(t,"/regenerate"),a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(o)});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("Regenerate key Response:",c),c}catch(e){throw console.error("Failed to regenerate key:",e),e}},V=!1,U=null,L=async(e,t,o)=>{try{console.log("modelInfoCall:",e,t,o);let c=n?"".concat(n,"/v2/model/info"):"/v2/model/info";r.ZL.includes(o)||(c+="?user_models_only=true");let s=await fetch(c,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw e+="error shown=".concat(V),V||(e.includes("No model list passed")&&(e="No Models Exist. Click Add Model to get started."),a.ZP.info(e,10),V=!0,U&&clearTimeout(U),U=setTimeout(()=>{V=!1},1e4)),Error("Network response was not ok")}let l=await s.json();return console.log("modelInfoCall:",l),l}catch(e){throw console.error("Failed to create key:",e),e}},M=async(e,t)=>{try{let o=n?"".concat(n,"/v1/model/info"):"/v1/model/info";o+="?litellm_model_id=".concat(t);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");let a=await r.json();return console.log("modelInfoV1Call:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},Z=async e=>{try{let t=n?"".concat(n,"/model_group/info"):"/model_group/info",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("modelHubCall:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},D=async e=>{try{let t=n?"".concat(n,"/get/allowed_ips"):"/get/allowed_ips",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw Error("Network response was not ok: ".concat(e))}let r=await o.json();return console.log("getAllowedIPs:",r),r.data}catch(e){throw console.error("Failed to get allowed IPs:",e),e}},H=async(e,t)=>{try{let o=n?"".concat(n,"/add/allowed_ip"):"/add/allowed_ip",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let a=await r.json();return console.log("addAllowedIP:",a),a}catch(e){throw console.error("Failed to add allowed IP:",e),e}},q=async(e,t)=>{try{let o=n?"".concat(n,"/delete/allowed_ip"):"/delete/allowed_ip",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let a=await r.json();return console.log("deleteAllowedIP:",a),a}catch(e){throw console.error("Failed to delete allowed IP:",e),e}},X=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics"):"/model/metrics";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},K=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/model/streaming_metrics"):"/model/streaming_metrics";t&&(a="".concat(a,"?_selected_model_group=").concat(t,"&startTime=").concat(o,"&endTime=").concat(r));let c=await fetch(a,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},$=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics/slow_responses"):"/model/metrics/slow_responses";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},Q=async(e,t,o,r,a,c,s,d)=>{try{let t=n?"".concat(n,"/model/metrics/exceptions"):"/model/metrics/exceptions";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(a,"&endTime=").concat(c,"&api_key=").concat(s,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},Y=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4&&void 0!==arguments[4]?arguments[4]:null;console.log("in /models calls, globalLitellmHeaderName",i);try{let t=n?"".concat(n,"/models"):"/models",o=new URLSearchParams;!0===r&&o.append("return_wildcard_routes","True"),a&&o.append("team_id",a.toString()),o.toString()&&(t+="?".concat(o.toString()));let c=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},W=async e=>{try{let t=n?"".concat(n,"/global/spend/teams"):"/global/spend/teams";console.log("in teamSpendLogsCall:",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ee=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/tags"):"/global/spend/tags";t&&o&&(a="".concat(a,"?start_date=").concat(t,"&end_date=").concat(o)),r&&(a+="".concat(a,"&tags=").concat(r.join(","))),console.log("in tagsSpendLogsCall:",a);let c=await fetch("".concat(a),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},et=async e=>{try{let t=n?"".concat(n,"/global/spend/all_tag_names"):"/global/spend/all_tag_names";console.log("in global/spend/all_tag_names call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},eo=async e=>{try{let t=n?"".concat(n,"/global/all_end_users"):"/global/all_end_users";console.log("in global/all_end_users call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},er=async(e,t)=>{try{let o=n?"".concat(n,"/user/filter/ui"):"/user/filter/ui";t.get("user_email")&&(o+="?user_email=".concat(t.get("user_email"))),t.get("user_id")&&(o+="?user_id=".concat(t.get("user_id")));let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to create key:",e),e}},ea=async(e,t,o,r,a,c,s,d,h)=>{try{let w=n?"".concat(n,"/spend/logs/ui"):"/spend/logs/ui",p=new URLSearchParams;t&&p.append("api_key",t),o&&p.append("team_id",o),r&&p.append("request_id",r),a&&p.append("start_date",a),c&&p.append("end_date",c),s&&p.append("page",s.toString()),d&&p.append("page_size",d.toString()),h&&p.append("user_id",h);let u=p.toString();u&&(w+="?".concat(u));let y=await fetch(w,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!y.ok){let e=await y.text();throw l(e),Error("Network response was not ok")}let f=await y.json();return console.log("Spend Logs Response:",f),f}catch(e){throw console.error("Failed to fetch spend logs:",e),e}},en=async e=>{try{let t=n?"".concat(n,"/global/spend/logs"):"/global/spend/logs",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ec=async e=>{try{let t=n?"".concat(n,"/global/spend/keys?limit=5"):"/global/spend/keys?limit=5",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},es=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/end_users"):"/global/spend/end_users",c="";c=t?JSON.stringify({api_key:t,startTime:o,endTime:r}):JSON.stringify({startTime:o,endTime:r});let s={method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:c},d=await fetch(a,s);if(!d.ok){let e=await d.text();throw l(e),Error("Network response was not ok")}let h=await d.json();return console.log(h),h}catch(e){throw console.error("Failed to create key:",e),e}},el=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/spend/provider"):"/global/spend/provider";o&&r&&(a+="?start_date=".concat(o,"&end_date=").concat(r)),t&&(a+="&api_key=".concat(t));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok){let e=await s.text();throw l(e),Error("Network response was not ok")}let d=await s.json();return console.log(d),d}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ei=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity"):"/global/activity";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ed=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity/cache_hits"):"/global/activity/cache_hits";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},eh=async(e,t,o)=>{try{let r=n?"".concat(n,"/global/activity/model"):"/global/activity/model";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let a={method:"GET",headers:{[i]:"Bearer ".concat(e)}},c=await fetch(r,a);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ew=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/activity/exceptions"):"/global/activity/exceptions";t&&o&&(a+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(a+="&model_group=".concat(r));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let l=await s.json();return console.log(l),l}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ep=async(e,t,o,r)=>{try{let a=n?"".concat(n,"/global/activity/exceptions/deployment"):"/global/activity/exceptions/deployment";t&&o&&(a+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(a+="&model_group=".concat(r));let c={method:"GET",headers:{[i]:"Bearer ".concat(e)}},s=await fetch(a,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let l=await s.json();return console.log(l),l}catch(e){throw console.error("Failed to fetch spend data:",e),e}},eu=async e=>{try{let t=n?"".concat(n,"/global/spend/models?limit=5"):"/global/spend/models?limit=5",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ey=async(e,t,o)=>{try{console.log("Sending model connection test request:",JSON.stringify(t));let a=n?"".concat(n,"/health/test_connection"):"/health/test_connection",c=await fetch(a,{method:"POST",headers:{"Content-Type":"application/json",[i]:"Bearer ".concat(e)},body:JSON.stringify({litellm_params:t,mode:o})}),s=c.headers.get("content-type");if(!s||!s.includes("application/json")){let e=await c.text();throw console.error("Received non-JSON response:",e),Error("Received non-JSON response (".concat(c.status,": ").concat(c.statusText,"). Check network tab for details."))}let l=await c.json();if(!c.ok||"error"===l.status){if("error"===l.status);else{var r;return{status:"error",message:(null===(r=l.error)||void 0===r?void 0:r.message)||"Connection test failed: ".concat(c.status," ").concat(c.statusText)}}}return l}catch(e){throw console.error("Model connection test error:",e),e}},ef=async(e,t)=>{try{console.log("entering keyInfoV1Call");let o=n?"".concat(n,"/key/info"):"/key/info";o="".concat(o,"?key=").concat(t);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(console.log("response",r),!r.ok){let e=await r.text();l(e),a.ZP.error("Failed to fetch key info - "+e)}let c=await r.json();return console.log("data",c),c}catch(e){throw console.error("Failed to fetch key info:",e),e}},em=async(e,t,o,r,a)=>{try{let c=n?"".concat(n,"/key/list"):"/key/list";console.log("in keyListCall");let s=new URLSearchParams;o&&s.append("team_id",o.toString()),t&&s.append("organization_id",t.toString()),r&&s.append("page",r.toString()),a&&s.append("size",a.toString()),s.append("return_full_object","true"),s.append("include_team_keys","true");let d=s.toString();d&&(c+="?".concat(d));let h=await fetch(c,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!h.ok){let e=await h.text();throw l(e),Error("Network response was not ok")}let w=await h.json();return console.log("/team/list API Response:",w),w}catch(e){throw console.error("Failed to create key:",e),e}},eg=async(e,t)=>{try{let o=n?"".concat(n,"/user/get_users?role=").concat(t):"/user/get_users?role=".concat(t);console.log("in userGetAllUsersCall:",o);let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},ek=async e=>{try{let t=n?"".concat(n,"/user/available_roles"):"/user/available_roles",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("response from user/available_role",r),r}catch(e){throw e}},e_=async(e,t)=>{try{if(console.log("Form Values in teamCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/team/new"):"/team/new",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eT=async(e,t)=>{try{if(console.log("Form Values in credentialCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=n?"".concat(n,"/credentials"):"/credentials",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eE=async e=>{try{let t=n?"".concat(n,"/credentials"):"/credentials";console.log("in credentialListCall");let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("/credentials API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},ej=async(e,t,o)=>{try{let r=n?"".concat(n,"/credentials"):"/credentials";t?r+="/by_name/".concat(t):o&&(r+="/by_model/".concat(o)),console.log("in credentialListCall");let a=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("/credentials API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eC=async(e,t)=>{try{let o=n?"".concat(n,"/credentials/").concat(t):"/credentials/".concat(t);console.log("in credentialDeleteCall:",t);let r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let a=await r.json();return console.log(a),a}catch(e){throw console.error("Failed to delete key:",e),e}},eN=async(e,t,o)=>{try{if(console.log("Form Values in credentialUpdateCall:",o),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let r=n?"".concat(n,"/credentials/").concat(t):"/credentials/".concat(t),a=await fetch(r,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eb=async(e,t)=>{try{if(console.log("Form Values in keyUpdateCall:",t),t.model_tpm_limit){console.log("formValues.model_tpm_limit:",t.model_tpm_limit);try{t.model_tpm_limit=JSON.parse(t.model_tpm_limit)}catch(e){throw Error("Failed to parse model_tpm_limit: "+e)}}if(t.model_rpm_limit){console.log("formValues.model_rpm_limit:",t.model_rpm_limit);try{t.model_rpm_limit=JSON.parse(t.model_rpm_limit)}catch(e){throw Error("Failed to parse model_rpm_limit: "+e)}}let o=n?"".concat(n,"/key/update"):"/key/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update key Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eF=async(e,t)=>{try{console.log("Form Values in teamUpateCall:",t);let o=n?"".concat(n,"/team/update"):"/team/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update Team Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},eS=async(e,t)=>{try{console.log("Form Values in modelUpateCall:",t);let o=n?"".concat(n,"/model/update"):"/model/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),console.error("Error update from the server:",e),Error("Network response was not ok")}let a=await r.json();return console.log("Update model Response:",a),a}catch(e){throw console.error("Failed to update model:",e),e}},ex=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_add"):"/team/member_add",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,member:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eB=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_update"):"/team/member_update",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,role:o.role,user_id:o.user_id})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eO=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/team/member_delete"):"/team/member_delete",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,...void 0!==o.user_email&&{user_email:o.user_email},...void 0!==o.user_id&&{user_id:o.user_id}})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eP=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=n?"".concat(n,"/organization/member_add"):"/organization/member_add",a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,member:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error(e)}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create organization member:",e),e}},ev=async(e,t,o)=>{try{console.log("Form Values in organizationMemberDeleteCall:",o);let r=n?"".concat(n,"/organization/member_delete"):"/organization/member_delete",a=await fetch(r,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,user_id:o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to delete organization member:",e),e}},eG=async(e,t,o)=>{try{console.log("Form Values in organizationMemberUpdateCall:",o);let r=n?"".concat(n,"/organization/member_update"):"/organization/member_update",a=await fetch(r,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,...o})});if(!a.ok){let e=await a.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await a.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to update organization member:",e),e}},eA=async(e,t,o)=>{try{console.log("Form Values in userUpdateUserCall:",t);let r=n?"".concat(n,"/user/update"):"/user/update",a={...t};null!==o&&(a.user_role=o),a=JSON.stringify(a);let c=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:a});if(!c.ok){let e=await c.text();throw l(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await c.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},eJ=async(e,t)=>{try{let o=n?"".concat(n,"/health/services?service=").concat(t):"/health/services?service=".concat(t);console.log("Checking Slack Budget Alerts service health");let r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error(e)}let c=await r.json();return a.ZP.success("Test request to ".concat(t," made - check logs/alerts on ").concat(t," to verify")),c}catch(e){throw console.error("Failed to perform health check:",e),e}},eI=async e=>{try{let t=n?"".concat(n,"/budget/list"):"/budget/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eR=async(e,t,o)=>{try{let t=n?"".concat(n,"/get/config/callbacks"):"/get/config/callbacks",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},ez=async e=>{try{let t=n?"".concat(n,"/config/list?config_type=general_settings"):"/config/list?config_type=general_settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eV=async e=>{try{let t=n?"".concat(n,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eU=async(e,t)=>{try{let o=n?"".concat(n,"/config/field/info?field_name=").concat(t):"/config/field/info?field_name=".concat(t),r=await fetch(o,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eL=async(e,t)=>{try{let o=n?"".concat(n,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eM=async(e,t,o)=>{try{let r=n?"".concat(n,"/config/field/update"):"/config/field/update",c=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,field_value:o,config_type:"general_settings"})});if(!c.ok){let e=await c.text();throw l(e),Error("Network response was not ok")}let s=await c.json();return a.ZP.success("Successfully updated value!"),s}catch(e){throw console.error("Failed to set callbacks:",e),e}},eZ=async(e,t)=>{try{let o=n?"".concat(n,"/config/field/delete"):"/config/field/delete",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,config_type:"general_settings"})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let c=await r.json();return a.ZP.success("Field reset on proxy"),c}catch(e){throw console.error("Failed to get callbacks:",e),e}},eD=async(e,t)=>{try{let o=n?"".concat(n,"/config/pass_through_endpoint?endpoint_id=").concat(t):"/config/pass_through_endpoint".concat(t),r=await fetch(o,{method:"DELETE",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eH=async(e,t)=>{try{let o=n?"".concat(n,"/config/update"):"/config/update",r=await fetch(o,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eq=async e=>{try{let t=n?"".concat(n,"/health"):"/health",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to call /health:",e),e}},eX=async e=>{try{let t=n?"".concat(n,"/cache/ping"):"/cache/ping",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error(e)}return await o.json()}catch(e){throw console.error("Failed to call /cache/ping:",e),e}},eK=async e=>{try{let t=n?"".concat(n,"/sso/get/ui_settings"):"/sso/get/ui_settings",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},e$=async e=>{try{let t=n?"".concat(n,"/guardrails/list"):"/guardrails/list",o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Guardrails list response:",r),r}catch(e){throw console.error("Failed to fetch guardrails list:",e),e}},eQ=async(e,t,o)=>{try{let r=n?"".concat(n,"/spend/logs/ui/").concat(t,"?start_date=").concat(encodeURIComponent(o)):"/spend/logs/ui/".concat(t,"?start_date=").concat(encodeURIComponent(o));console.log("Fetching log details from:",r);let a=await fetch(r,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("Fetched log details:",c),c}catch(e){throw console.error("Failed to fetch log details:",e),e}},eY=async e=>{try{let t=n?"".concat(n,"/get/internal_user_settings"):"/get/internal_user_settings";console.log("Fetching SSO settings from:",t);let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Fetched SSO settings:",r),r}catch(e){throw console.error("Failed to fetch SSO settings:",e),e}},eW=async(e,t)=>{try{let o=n?"".concat(n,"/update/internal_user_settings"):"/update/internal_user_settings";console.log("Updating internal user settings:",t);let r=await fetch(o,{method:"PATCH",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(t)});if(!r.ok){let e=await r.text();throw l(e),Error("Network response was not ok")}let c=await r.json();return console.log("Updated internal user settings:",c),a.ZP.success("Internal user settings updated successfully"),c}catch(e){throw console.error("Failed to update internal user settings:",e),e}},e0=async e=>{try{let t=n?"".concat(n,"/mcp/tools/list"):"/mcp/tools/list";console.log("Fetching MCP tools from:",t);let o=await fetch(t,{method:"GET",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw l(e),Error("Network response was not ok")}let r=await o.json();return console.log("Fetched MCP tools:",r),r}catch(e){throw console.error("Failed to fetch MCP tools:",e),e}},e1=async(e,t,o)=>{try{let r=n?"".concat(n,"/mcp/tools/call"):"/mcp/tools/call";console.log("Calling MCP tool:",t,"with arguments:",o);let a=await fetch(r,{method:"POST",headers:{[i]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({name:t,arguments:o})});if(!a.ok){let e=await a.text();throw l(e),Error("Network response was not ok")}let c=await a.json();return console.log("MCP tool call response:",c),c}catch(e){throw console.error("Failed to call MCP tool:",e),e}}},20347:function(e,t,o){o.d(t,{LQ:function(){return n},ZL:function(){return r},lo:function(){return a}});let r=["Admin","Admin Viewer","proxy_admin","proxy_admin_viewer","org_admin"],a=["Internal User","Internal Viewer"],n=["Internal User","Admin"]}}]); \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/261-57d48f76eec1e568.js b/litellm/proxy/_experimental/out/_next/static/chunks/261-d4b99bc9f53d4ef3.js similarity index 99% rename from litellm/proxy/_experimental/out/_next/static/chunks/261-57d48f76eec1e568.js rename to litellm/proxy/_experimental/out/_next/static/chunks/261-d4b99bc9f53d4ef3.js index 44e5f1be73..f21f16362b 100644 --- a/litellm/proxy/_experimental/out/_next/static/chunks/261-57d48f76eec1e568.js +++ b/litellm/proxy/_experimental/out/_next/static/chunks/261-d4b99bc9f53d4ef3.js @@ -1 +1 @@ -(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[261],{23639:function(e,t,n){"use strict";n.d(t,{Z:function(){return s}});var a=n(1119),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(55015),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},77565:function(e,t,n){"use strict";n.d(t,{Z:function(){return s}});var a=n(1119),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(55015),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},12485:function(e,t,n){"use strict";n.d(t,{Z:function(){return p}});var a=n(5853),r=n(31492),i=n(26898),o=n(65954),s=n(1153),l=n(2265),c=n(35242),u=n(42698);n(64016),n(8710),n(33232);let d=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(u.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(d("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(d("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},18135:function(e,t,n){"use strict";n.d(t,{Z:function(){return c}});var a=n(5853),r=n(31492),i=n(65954),o=n(1153),s=n(2265);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:u,className:d}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",d)},p),u)});c.displayName="TabGroup"},35242:function(e,t,n){"use strict";n.d(t,{O:function(){return c},Z:function(){return d}});var a=n(5853),r=n(2265),i=n(42698);n(64016),n(8710),n(33232);var o=n(31492),s=n(65954);let l=(0,n(1153).fn)("TabList"),c=(0,r.createContext)("line"),u={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},d=r.forwardRef((e,t)=>{let{color:n,variant:d="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",u[d],g)},m),r.createElement(c.Provider,{value:d},r.createElement(i.Z.Provider,{value:n},p)))});d.displayName="TabList"},29706:function(e,t,n){"use strict";n.d(t,{Z:function(){return u}});var a=n(5853);n(42698);var r=n(64016);n(8710);var i=n(33232),o=n(65954),s=n(1153),l=n(2265);let c=(0,s.fn)("TabPanel"),u=l.forwardRef((e,t)=>{let{children:n,className:s}=e,u=(0,a._T)(e,["children","className"]),{selectedValue:d}=(0,l.useContext)(i.Z),p=d===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},u),n)});u.displayName="TabPanel"},77991:function(e,t,n){"use strict";n.d(t,{Z:function(){return d}});var a=n(5853),r=n(31492);n(42698);var i=n(64016);n(8710);var o=n(33232),s=n(65954),l=n(1153),c=n(2265);let u=(0,l.fn)("TabPanels"),d=c.forwardRef((e,t)=>{let{children:n,className:l}=e,d=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(u("root"),"w-full",l)},d),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});d.displayName="TabPanels"},42698:function(e,t,n){"use strict";n.d(t,{Z:function(){return i}});var a=n(2265),r=n(7084);n(65954);let i=(0,a.createContext)(r.fr.Blue)},64016:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(0)},8710:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(void 0)},33232:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)({selectedValue:void 0,handleValueChange:void 0})},93942:function(e,t,n){"use strict";n.d(t,{i:function(){return s}});var a=n(2265),r=n(50506),i=n(13959),o=n(71744);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,u=a.useRef(null),[d,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=u.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>u.current});return i&&(S=i(S)),a.createElement("div",{ref:u,style:{paddingBottom:d,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},51369:function(e,t,n){"use strict";let a;n.d(t,{Z:function(){return eY}});var r=n(83145),i=n(2265),o=n(18404),s=n(71744),l=n(13959),c=n(8900),u=n(39725),d=n(54537),p=n(55726),g=n(36760),m=n.n(g),b=n(62236),f=n(68710),E=n(55274),h=n(29961),S=n(69819),y=n(73002),T=n(51248),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:u,actionFn:d}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!d){f();return}if(l){var n;if(t=d(e),u&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(d.length)t=d(o),p.current=!1;else if(!(t=d())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(49638),w=n(1119),k=n(26365),C=n(28036),O=i.createContext({}),x=n(31686),L=n(2161),D=n(92491),P=n(95814),M=n(18242);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(47970),G=n(28791),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,u=e.ariaId,d=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),d&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},d)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:u},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?u:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,u=e.motionName,d=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:u,removeOnLeave:c,ref:b},function(s,l){var c=s.className,u=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:d,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},u),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,u=void 0===c||c,d=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},d),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&u){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(32559);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),u=(0,k.Z)(c,2),d=u[0],p=u[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||d)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||d,autoDestroy:!1,getContainer:n,autoLock:t||d},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(94981),Z=n(95140),X=n(39109),Q=n(65658),J=n(74126);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(86586),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(92246);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:u,cancelButtonProps:d,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:u,cancelButtonProps:d,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(12918),ec=n(11699),eu=n(691),ed=n(3104),ep=n(80669),eg=n(352);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,ed.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,eu._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(64024),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:u,className:d,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",u),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,d,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,u="".concat(t,"-confirm");return{[u]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(u,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(u,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(u,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(u,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(u,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(u,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(u,"-error ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(u,"-warning ").concat(u,"-body > ").concat(e.iconCls,",\n ").concat(u,"-confirm ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(u,"-info ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(u,"-success ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(u.Z,null);break;default:S=i.createElement(d.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:u,prefixCls:d,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(d,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===u},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:d,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),u=eO||c.getPrefixCls(),d=a||"".concat(u,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:u,prefixCls:d,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:d,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function d(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,u(s)}return u(s),eC.push(d),{destroy:d,update:function(e){u(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(93942),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:u}=e,d=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==u&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},d,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(13823),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,u]=i.useState(!0),[d,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){u(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);d.onCancel&&a&&d.onCancel.apply(d,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=d.okCancel)&&void 0!==n?n:"confirm"===d.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},d,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=d.afterClose)||void 0===e||e.call(d)},okText:d.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:d.direction||g,cancelText:d.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),u=new Promise(e=>{s=e}),d=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>d,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(d=!0,u.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},11699:function(e,t,n){"use strict";n.d(t,{J$:function(){return s}});var a=n(352),r=n(37133);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},26035:function(e){"use strict";e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,d)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,u)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function u(e){return"-"+e.toLowerCase()}function d(e){return e.charAt(1).toUpperCase()}},30466:function(e,t,n){"use strict";var a=n(82855),r=n(64541),i=n(80808),o=n(44987),s=n(72731),l=n(98946);e.exports=a([i,r,o,s,l])},72731:function(e,t,n){"use strict";var a=n(20321),r=n(41757),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},98946:function(e,t,n){"use strict";var a=n(20321),r=n(41757),i=n(53296),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,u=a.spaceSeparated,d=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:d,acceptCharset:u,accessKey:u,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:u,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:u,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:u,coords:c|d,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:u,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:u,httpEquiv:u,id:null,imageSizes:null,imageSrcSet:d,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:u,itemRef:u,itemScope:o,itemType:u,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:u,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:u,required:o,reversed:o,rows:c,rowSpan:c,sandbox:u,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:d,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:u,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},53296:function(e,t,n){"use strict";var a=n(38781);e.exports=function(e,t){return a(e,t.toLowerCase())}},38781:function(e){"use strict";e.exports=function(e,t){return t in e?e[t]:t}},41757:function(e,t,n){"use strict";var a=n(96532),r=n(61723),i=n(51351);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,u=e.transform,d={},p={};for(t in c)n=new i(t,u(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),d[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(d,p,o)}},51351:function(e,t,n){"use strict";var a=n(24192),r=n(20321);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,u,d=-1;for(s&&(this.space=s),a.call(this,e,t);++d