diff --git a/.circleci/config.yml b/.circleci/config.yml index 0a12aa73b8..886e121f35 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,7 +49,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.66.1 + pip install openai==1.68.2 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -168,7 +168,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.66.1 + pip install openai==1.68.2 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -268,7 +268,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.66.1 + pip install openai==1.68.2 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -513,7 +513,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.66.1 + pip install openai==1.68.2 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -680,6 +680,50 @@ jobs: paths: - llm_translation_coverage.xml - llm_translation_coverage + mcp_testing: + docker: + - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} + working_directory: ~/project + + steps: + - checkout + - run: + name: Install Dependencies + command: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + pip install "pytest==7.3.1" + pip install "pytest-retry==1.6.3" + pip install "pytest-cov==5.0.0" + pip install "pytest-asyncio==0.21.1" + pip install "respx==0.21.1" + pip install "pydantic==2.7.2" + pip install "mcp==1.4.1" + # Run pytest and generate JUnit XML report + - run: + name: Run tests + command: | + pwd + ls + python -m pytest -vv tests/mcp_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5 + no_output_timeout: 120m + - run: + name: Rename the coverage files + command: | + mv coverage.xml mcp_coverage.xml + mv .coverage mcp_coverage + + # Store test results + - store_test_results: + path: test-results + - persist_to_workspace: + root: . + paths: + - mcp_coverage.xml + - mcp_coverage llm_responses_api_testing: docker: - image: cimg/python:3.11 @@ -744,6 +788,8 @@ jobs: pip install "pytest-asyncio==0.21.1" pip install "respx==0.21.1" pip install "hypercorn==0.17.3" + pip install "pydantic==2.7.2" + pip install "mcp==1.4.1" # Run pytest and generate JUnit XML report - run: name: Run tests @@ -1278,7 +1324,7 @@ jobs: pip install "aiodynamo==23.10.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" - pip install "openai==1.66.1" + pip install "openai==1.68.2" - run: name: Install Grype command: | @@ -1353,7 +1399,7 @@ jobs: command: | pwd ls - python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests + python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests no_output_timeout: 120m # Store test results @@ -1414,7 +1460,7 @@ jobs: pip install "aiodynamo==23.10.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" - pip install "openai==1.66.1" + pip install "openai==1.68.2" # Run pytest and generate JUnit XML report - run: name: Build Docker image @@ -1536,7 +1582,7 @@ jobs: pip install "aiodynamo==23.10.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" - pip install "openai==1.66.1" + pip install "openai==1.68.2" - run: name: Build Docker image command: docker build -t my-app:latest -f ./docker/Dockerfile.database . @@ -1965,7 +2011,7 @@ jobs: pip install "pytest-asyncio==0.21.1" pip install "google-cloud-aiplatform==1.43.0" pip install aiohttp - pip install "openai==1.66.1" + pip install "openai==1.68.2" pip install "assemblyai==0.37.0" python -m pip install --upgrade pip pip install "pydantic==2.7.1" @@ -2112,7 +2158,7 @@ jobs: python -m venv venv . venv/bin/activate pip install coverage - coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage + coverage combine llm_translation_coverage llm_responses_api_coverage mcp_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage coverage xml - codecov/upload: file: ./coverage.xml @@ -2241,7 +2287,7 @@ jobs: pip install "pytest-retry==1.6.3" pip install "pytest-asyncio==0.21.1" pip install aiohttp - pip install "openai==1.66.1" + pip install "openai==1.68.2" python -m pip install --upgrade pip pip install "pydantic==2.7.1" pip install "pytest==7.3.1" @@ -2473,6 +2519,12 @@ workflows: only: - main - /litellm_.*/ + - mcp_testing: + filters: + branches: + only: + - main + - /litellm_.*/ - llm_responses_api_testing: filters: branches: @@ -2518,6 +2570,7 @@ workflows: - upload-coverage: requires: - llm_translation_testing + - mcp_testing - llm_responses_api_testing - litellm_mapped_tests - batches_testing @@ -2577,6 +2630,7 @@ workflows: - load_testing - test_bad_database_url - llm_translation_testing + - mcp_testing - llm_responses_api_testing - litellm_mapped_tests - batches_testing diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt index e63fb9dd9a..356a9840f5 100644 --- a/.circleci/requirements.txt +++ b/.circleci/requirements.txt @@ -1,5 +1,5 @@ # used by CI/CD testing -openai==1.66.1 +openai==1.68.2 python-dotenv tiktoken importlib_metadata diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d50aefa8bb..6c887178d5 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -10,7 +10,7 @@ **Please complete all items before asking a LiteLLM maintainer to review your PR** -- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code) +- [ ] I have Added testing in the [`tests/litellm/`](https://github.com/BerriAI/litellm/tree/main/tests/litellm) directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code) - [ ] I have added a screenshot of my new test passing locally - [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code] - [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem diff --git a/docs/my-website/docs/mcp.md b/docs/my-website/docs/mcp.md index 42489477cf..9f3343e9cd 100644 --- a/docs/my-website/docs/mcp.md +++ b/docs/my-website/docs/mcp.md @@ -1,14 +1,291 @@ -import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; +import Image from '@theme/IdealImage'; + +# /mcp [BETA] - Model Context Protocol + +Use Model Context Protocol with LiteLLM + + + +

+ LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models +

-# /mcp Model Context Protocol [BETA] ## Overview -LiteLLM's MCP implementation allows you to define tools that can be called by any MCP compatible client. Define your `mcp_tools` with LiteLLM and all your clients can `list` and `call` available tools. +LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP -## How it works +- **List** Available MCP Tools: OpenAI clients can view all available MCP tools + - `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools +- **Call** MCP Tools: OpenAI clients can call MCP tools + - `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server + + +## Usage + +### 1. List Available MCP Tools + +In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways: + +- `format="mcp"` - (default) Return MCP tools + - Returns: `mcp.types.Tool` +- `format="openai"` - Return MCP tools converted to OpenAI API compatible tools. Allows using with OpenAI endpoints. + - Returns: `openai.types.chat.ChatCompletionToolParam` + + + + +```python title="MCP Client List Tools" showLineNumbers +# Create server parameters for stdio connection +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import os +import litellm +from litellm import experimental_mcp_client + + +server_params = StdioServerParameters( + command="python3", + # Make sure to update to the full absolute path to your mcp_server.py file + args=["./mcp_server.py"], +) + +async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection + await session.initialize() + + # Get tools + tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai") + print("MCP TOOLS: ", tools) + + messages = [{"role": "user", "content": "what's (3 + 5)"}] + llm_response = await litellm.acompletion( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + messages=messages, + tools=tools, + ) + print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str)) +``` + + + + + +In this example we'll walk through how you can use the OpenAI SDK pointed to the LiteLLM proxy to call MCP tools. The key difference here is we use the OpenAI SDK to make the LLM API request + +```python title="MCP Client List Tools" showLineNumbers +# Create server parameters for stdio connection +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import os +from openai import OpenAI +from litellm import experimental_mcp_client + +server_params = StdioServerParameters( + command="python3", + # Make sure to update to the full absolute path to your mcp_server.py file + args=["./mcp_server.py"], +) + +async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection + await session.initialize() + + # Get tools using litellm mcp client + tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai") + print("MCP TOOLS: ", tools) + + # Use OpenAI SDK pointed to LiteLLM proxy + client = OpenAI( + api_key="your-api-key", # Your LiteLLM proxy API key + base_url="http://localhost:4000" # Your LiteLLM proxy URL + ) + + messages = [{"role": "user", "content": "what's (3 + 5)"}] + llm_response = client.chat.completions.create( + model="gpt-4", + messages=messages, + tools=tools + ) + print("LLM RESPONSE: ", llm_response) +``` + + + + +### 2. List and Call MCP Tools + +In this example we'll use +- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server +- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server + +The first llm response returns a list of OpenAI tools. We take the first tool call from the LLM response and pass it to `litellm.experimental_mcp_client.call_openai_tool` to call the tool on the MCP server. + +#### How `litellm.experimental_mcp_client.call_openai_tool` works + +- Accepts an OpenAI Tool Call from the LLM response +- Converts the OpenAI Tool Call to an MCP Tool +- Calls the MCP Tool on the MCP server +- Returns the result of the MCP Tool call + + + + +```python title="MCP Client List and Call Tools" showLineNumbers +# Create server parameters for stdio connection +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import os +import litellm +from litellm import experimental_mcp_client + + +server_params = StdioServerParameters( + command="python3", + # Make sure to update to the full absolute path to your mcp_server.py file + args=["./mcp_server.py"], +) + +async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection + await session.initialize() + + # Get tools + tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai") + print("MCP TOOLS: ", tools) + + messages = [{"role": "user", "content": "what's (3 + 5)"}] + llm_response = await litellm.acompletion( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + messages=messages, + tools=tools, + ) + print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str)) + + openai_tool = llm_response["choices"][0]["message"]["tool_calls"][0] + # Call the tool using MCP client + call_result = await experimental_mcp_client.call_openai_tool( + session=session, + openai_tool=openai_tool, + ) + print("MCP TOOL CALL RESULT: ", call_result) + + # send the tool result to the LLM + messages.append(llm_response["choices"][0]["message"]) + messages.append( + { + "role": "tool", + "content": str(call_result.content[0].text), + "tool_call_id": openai_tool["id"], + } + ) + print("final messages with tool result: ", messages) + llm_response = await litellm.acompletion( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + messages=messages, + tools=tools, + ) + print( + "FINAL LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str) + ) +``` + + + + +In this example we'll walk through how you can use the OpenAI SDK pointed to the LiteLLM proxy to call MCP tools. The key difference here is we use the OpenAI SDK to make the LLM API request + +```python title="MCP Client with OpenAI SDK" showLineNumbers +# Create server parameters for stdio connection +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import os +from openai import OpenAI +from litellm import experimental_mcp_client + +server_params = StdioServerParameters( + command="python3", + # Make sure to update to the full absolute path to your mcp_server.py file + args=["./mcp_server.py"], +) + +async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection + await session.initialize() + + # Get tools using litellm mcp client + tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai") + print("MCP TOOLS: ", tools) + + # Use OpenAI SDK pointed to LiteLLM proxy + client = OpenAI( + api_key="your-api-key", # Your LiteLLM proxy API key + base_url="http://localhost:8000" # Your LiteLLM proxy URL + ) + + messages = [{"role": "user", "content": "what's (3 + 5)"}] + llm_response = client.chat.completions.create( + model="gpt-4", + messages=messages, + tools=tools + ) + print("LLM RESPONSE: ", llm_response) + + # Get the first tool call + tool_call = llm_response.choices[0].message.tool_calls[0] + + # Call the tool using MCP client + call_result = await experimental_mcp_client.call_openai_tool( + session=session, + openai_tool=tool_call.model_dump(), + ) + print("MCP TOOL CALL RESULT: ", call_result) + + # Send the tool result back to the LLM + messages.append(llm_response.choices[0].message.model_dump()) + messages.append({ + "role": "tool", + "content": str(call_result.content[0].text), + "tool_call_id": tool_call.id, + }) + + final_response = client.chat.completions.create( + model="gpt-4", + messages=messages, + tools=tools + ) + print("FINAL RESPONSE: ", final_response) +``` + + + + +## Upcoming Features + +:::info + +**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above. + +::: + + +### Expose MCP tools on LiteLLM Proxy Server + +This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools. + +#### How it works LiteLLM exposes the following MCP endpoints: @@ -25,9 +302,9 @@ When MCP clients connect to LiteLLM they can follow this workflow: 6. LiteLLM makes the tool calls to the appropriate handlers 7. LiteLLM returns the tool call results to the MCP client -## Quick Start +#### Usage -### 1. Define your tools on mcp_tools +#### 1. Define your tools on mcp_tools LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`). @@ -56,7 +333,7 @@ mcp_tools: handler: "mcp_tools.get_current_time" ``` -### 2. Define a handler for your tool +#### 2. Define a handler for your tool Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called. @@ -82,7 +359,7 @@ def get_current_time(format: str = "short"): return current_time.strftime('%H:%M') ``` -### 3. Start LiteLLM Gateway +#### 3. Start LiteLLM Gateway @@ -114,7 +391,7 @@ litellm --config config.yaml --detailed_debug -### 3. Make an LLM API request +#### 4. Make an LLM API request @@ -161,11 +438,11 @@ if __name__ == "__main__": ``` -## Specification for `mcp_tools` +### Specification for `mcp_tools` The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients. -### Tool Definition Format +#### Tool Definition Format ```yaml mcp_tools: @@ -175,14 +452,14 @@ mcp_tools: handler: string # Required: Path to the function that implements the tool ``` -### Field Details +#### Field Details - `name`: A unique identifier for the tool - `description`: A clear description of what the tool does, used by LLMs to determine when to call it - `input_schema`: JSON Schema object defining the expected input parameters - `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name") -### Example Tool Definition +#### Example Tool Definition ```yaml mcp_tools: diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md index ce366af541..4918d889ed 100644 --- a/docs/my-website/docs/pass_through/vertex_ai.md +++ b/docs/my-website/docs/pass_through/vertex_ai.md @@ -15,6 +15,91 @@ Pass-through endpoints for Vertex AI - call provider-specific endpoint, in nativ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai` +LiteLLM supports 3 flows for calling Vertex AI endpoints via pass-through: + +1. **Specific Credentials**: Admin sets passthrough credentials for a specific project/region. + +2. **Default Credentials**: Admin sets default credentials. + +3. **Client-Side Credentials**: User can send client-side credentials through to Vertex AI (default behavior - if no default or mapped credentials are found, the request is passed through directly). + + +## Example Usage + + + + +```yaml +model_list: + - model_name: gemini-1.0-pro + litellm_params: + model: vertex_ai/gemini-1.0-pro + vertex_project: adroit-crow-413218 + vertex_region: us-central1 + vertex_credentials: /path/to/credentials.json + use_in_pass_through: true # 👈 KEY CHANGE +``` + + + + + + + +```yaml +default_vertex_config: + vertex_project: adroit-crow-413218 + vertex_region: us-central1 + vertex_credentials: /path/to/credentials.json +``` + + + +```bash +export DEFAULT_VERTEXAI_PROJECT="adroit-crow-413218" +export DEFAULT_VERTEXAI_LOCATION="us-central1" +export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json" +``` + + + + + + +Try Gemini 2.0 Flash (curl) + +``` +MODEL_ID="gemini-2.0-flash-001" +PROJECT_ID="YOUR_PROJECT_ID" +``` + +```bash +curl \ + -X POST \ + -H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \ + -H "Content-Type: application/json" \ + "${LITELLM_PROXY_BASE_URL}/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:streamGenerateContent" -d \ + $'{ + "contents": { + "role": "user", + "parts": [ + { + "fileData": { + "mimeType": "image/png", + "fileUri": "gs://generativeai-downloads/images/scones.jpg" + } + }, + { + "text": "Describe this picture." + } + ] + } + }' +``` + + + + #### **Example Usage** @@ -22,7 +107,7 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE ```bash -curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \ +curl http://localhost:4000/vertex_ai/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:generateContent \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{ @@ -101,7 +186,7 @@ litellm Let's call the Google AI Studio token counting endpoint ```bash -curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \ +curl http://localhost:4000/vertex-ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \ -H "Content-Type: application/json" \ -H "Authorization: Bearer sk-1234" \ -d '{ @@ -140,7 +225,7 @@ LiteLLM Proxy Server supports two methods of authentication to Vertex AI: ```shell -curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:generateContent \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' @@ -152,7 +237,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-0 ```shell -curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/textembedding-gecko@001:predict \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{"instances":[{"content": "gm"}]}' @@ -162,7 +247,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-geck ### Imagen API ```shell -curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/imagen-3.0-generate-001:predict \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}' @@ -172,7 +257,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generat ### Count Tokens API ```shell -curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:countTokens \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' @@ -183,7 +268,7 @@ Create Fine Tuning Job ```shell -curl http://localhost:4000/vertex_ai/tuningJobs \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:tuningJobs \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{ @@ -243,7 +328,7 @@ Expected Response ```bash -curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{ @@ -268,7 +353,7 @@ tags: ["vertex-js-sdk", "pass-through-endpoint"] ```bash -curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \ +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -H "tags: vertex-js-sdk,pass-through-endpoint" \ diff --git a/docs/my-website/docs/providers/predibase.md b/docs/my-website/docs/providers/predibase.md index 31713aef1e..9f25309c19 100644 --- a/docs/my-website/docs/providers/predibase.md +++ b/docs/my-website/docs/providers/predibase.md @@ -230,7 +230,7 @@ response = completion( model="predibase/llama-3-8b-instruct", messages = [{ "content": "Hello, how are you?","role": "user"}], adapter_id="my_repo/3", - adapter_soruce="pbase", + adapter_source="pbase", ) ``` diff --git a/docs/my-website/docs/proxy/image_handling.md b/docs/my-website/docs/proxy/image_handling.md new file mode 100644 index 0000000000..300ab0bc38 --- /dev/null +++ b/docs/my-website/docs/proxy/image_handling.md @@ -0,0 +1,21 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Image URL Handling + + + +Some LLM API's don't support url's for images, but do support base-64 strings. + +For those, LiteLLM will: + +1. Detect a URL being passed +2. Check if the LLM API supports a URL +3. Else, will download the base64 +4. Send the provider a base64 string. + + +LiteLLM also caches this result, in-memory to reduce latency for subsequent calls. + +The limit for an in-memory cache is 1MB. \ No newline at end of file diff --git a/docs/my-website/docs/proxy/release_cycle.md b/docs/my-website/docs/proxy/release_cycle.md index 947a4ae6b3..c5782087f2 100644 --- a/docs/my-website/docs/proxy/release_cycle.md +++ b/docs/my-website/docs/proxy/release_cycle.md @@ -4,9 +4,17 @@ Litellm Proxy has the following release cycle: - `v1.x.x-nightly`: These are releases which pass ci/cd. - `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711). -- `v1.x.x` OR `v1.x.x-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing. +- `v1.x.x:main-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing. -In production, we recommend using the latest `v1.x.x` release. +In production, we recommend using the latest `v1.x.x:main-stable` release. -Follow our release notes [here](https://github.com/BerriAI/litellm/releases). \ No newline at end of file +Follow our release notes [here](https://github.com/BerriAI/litellm/releases). + + +## FAQ + +### Is there a release schedule for LiteLLM stable release? + +Stable releases come out every week (typically Sunday) + diff --git a/docs/my-website/img/image_handling.png b/docs/my-website/img/image_handling.png new file mode 100644 index 0000000000..bd56206911 Binary files /dev/null and b/docs/my-website/img/image_handling.png differ diff --git a/docs/my-website/img/litellm_mcp.png b/docs/my-website/img/litellm_mcp.png new file mode 100644 index 0000000000..cef822eeb2 Binary files /dev/null and b/docs/my-website/img/litellm_mcp.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index baae2dfe6d..bff3ad41a2 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -53,7 +53,7 @@ const sidebars = { { type: "category", label: "Architecture", - items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch"], + items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"], }, { type: "link", @@ -293,6 +293,7 @@ const sidebars = { "text_completion", "embedding/supported_embedding", "anthropic_unified", + "mcp", { type: "category", label: "/images", diff --git a/litellm/__init__.py b/litellm/__init__.py index 762a058c7e..25da650440 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -2,7 +2,7 @@ import warnings warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*") -### INIT VARIABLES ######### +### INIT VARIABLES ########## import threading import os from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 9fca969226..5e09fe845f 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -9,9 +9,13 @@ Has 4 methods: """ import json +import sys import time -from typing import List, Optional +from typing import Any, List, Optional +from pydantic import BaseModel + +from ..constants import MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB from .base_cache import BaseCache @@ -22,6 +26,7 @@ class InMemoryCache(BaseCache): default_ttl: Optional[ int ] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute + max_size_per_item: Optional[int] = 1024, # 1MB = 1024KB ): """ max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default @@ -30,11 +35,53 @@ class InMemoryCache(BaseCache): max_size_in_memory or 200 ) # set an upper bound of 200 items in-memory self.default_ttl = default_ttl or 600 + self.max_size_per_item = ( + max_size_per_item or MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB + ) # 1MB = 1024KB # in-memory cache self.cache_dict: dict = {} self.ttl_dict: dict = {} + def check_value_size(self, value: Any): + """ + Check if value size exceeds max_size_per_item (1MB) + Returns True if value size is acceptable, False otherwise + """ + try: + # Fast path for common primitive types that are typically small + if ( + isinstance(value, (bool, int, float, str)) + and len(str(value)) < self.max_size_per_item * 512 + ): # Conservative estimate + return True + + # Direct size check for bytes objects + if isinstance(value, bytes): + return sys.getsizeof(value) / 1024 <= self.max_size_per_item + + # Handle special types without full conversion when possible + if hasattr(value, "__sizeof__"): # Use __sizeof__ if available + size = value.__sizeof__() / 1024 + return size <= self.max_size_per_item + + # Fallback for complex types + if isinstance(value, BaseModel) and hasattr( + value, "model_dump" + ): # Pydantic v2 + value = value.model_dump() + elif hasattr(value, "isoformat"): # datetime objects + return True # datetime strings are always small + + # Only convert to JSON if absolutely necessary + if not isinstance(value, (str, bytes)): + value = json.dumps(value, default=str) + + return sys.getsizeof(value) / 1024 <= self.max_size_per_item + + except Exception: + return False + def evict_cache(self): """ Eviction policy: @@ -61,6 +108,8 @@ class InMemoryCache(BaseCache): if len(self.cache_dict) >= self.max_size_in_memory: # only evict when cache is full self.evict_cache() + if not self.check_value_size(value): + return self.cache_dict[key] = value if "ttl" in kwargs and kwargs["ttl"] is not None: diff --git a/litellm/constants.py b/litellm/constants.py index eb59858d43..da66f897c9 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -14,6 +14,7 @@ DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1 DEFAULT_IMAGE_TOKEN_COUNT = 250 DEFAULT_IMAGE_WIDTH = 300 DEFAULT_IMAGE_HEIGHT = 300 +MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. #### RELIABILITY #### REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. diff --git a/litellm/experimental_mcp_client/Readme.md b/litellm/experimental_mcp_client/Readme.md new file mode 100644 index 0000000000..4fbd624369 --- /dev/null +++ b/litellm/experimental_mcp_client/Readme.md @@ -0,0 +1,6 @@ +# LiteLLM MCP Client + +LiteLLM MCP Client is a client that allows you to use MCP tools with LiteLLM. + + + diff --git a/litellm/experimental_mcp_client/__init__.py b/litellm/experimental_mcp_client/__init__.py new file mode 100644 index 0000000000..7110d5375e --- /dev/null +++ b/litellm/experimental_mcp_client/__init__.py @@ -0,0 +1,3 @@ +from .tools import call_openai_tool, load_mcp_tools + +__all__ = ["load_mcp_tools", "call_openai_tool"] diff --git a/litellm/experimental_mcp_client/client.py b/litellm/experimental_mcp_client/client.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/litellm/experimental_mcp_client/tools.py b/litellm/experimental_mcp_client/tools.py new file mode 100644 index 0000000000..f4ebbf4af4 --- /dev/null +++ b/litellm/experimental_mcp_client/tools.py @@ -0,0 +1,109 @@ +import json +from typing import List, Literal, Union + +from mcp import ClientSession +from mcp.types import CallToolRequestParams as MCPCallToolRequestParams +from mcp.types import CallToolResult as MCPCallToolResult +from mcp.types import Tool as MCPTool +from openai.types.chat import ChatCompletionToolParam +from openai.types.shared_params.function_definition import FunctionDefinition + +from litellm.types.utils import ChatCompletionMessageToolCall + + +######################################################## +# List MCP Tool functions +######################################################## +def transform_mcp_tool_to_openai_tool(mcp_tool: MCPTool) -> ChatCompletionToolParam: + """Convert an MCP tool to an OpenAI tool.""" + return ChatCompletionToolParam( + type="function", + function=FunctionDefinition( + name=mcp_tool.name, + description=mcp_tool.description or "", + parameters=mcp_tool.inputSchema, + strict=False, + ), + ) + + +async def load_mcp_tools( + session: ClientSession, format: Literal["mcp", "openai"] = "mcp" +) -> Union[List[MCPTool], List[ChatCompletionToolParam]]: + """ + Load all available MCP tools + + Args: + session: The MCP session to use + format: The format to convert the tools to + By default, the tools are returned in MCP format. + + If format is set to "openai", the tools are converted to OpenAI API compatible tools. + """ + tools = await session.list_tools() + if format == "openai": + return [ + transform_mcp_tool_to_openai_tool(mcp_tool=tool) for tool in tools.tools + ] + return tools.tools + + +######################################################## +# Call MCP Tool functions +######################################################## + + +async def call_mcp_tool( + session: ClientSession, + call_tool_request_params: MCPCallToolRequestParams, +) -> MCPCallToolResult: + """Call an MCP tool.""" + tool_result = await session.call_tool( + name=call_tool_request_params.name, + arguments=call_tool_request_params.arguments, + ) + return tool_result + + +def _get_function_arguments(function: FunctionDefinition) -> dict: + """Helper to safely get and parse function arguments.""" + arguments = function.get("arguments", {}) + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except json.JSONDecodeError: + arguments = {} + return arguments if isinstance(arguments, dict) else {} + + +def _transform_openai_tool_call_to_mcp_tool_call_request( + openai_tool: ChatCompletionMessageToolCall, +) -> MCPCallToolRequestParams: + """Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams.""" + function = openai_tool["function"] + return MCPCallToolRequestParams( + name=function["name"], + arguments=_get_function_arguments(function), + ) + + +async def call_openai_tool( + session: ClientSession, + openai_tool: ChatCompletionMessageToolCall, +) -> MCPCallToolResult: + """ + Call an OpenAI tool using MCP client. + + Args: + session: The MCP session to use + openai_tool: The OpenAI tool to call. You can get this from the `choices[0].message.tool_calls[0]` of the response from the OpenAI API. + Returns: + The result of the MCP tool call. + """ + mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request( + openai_tool=openai_tool, + ) + return await call_mcp_tool( + session=session, + call_tool_request_params=mcp_tool_call_request_params, + ) diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py index ebb1032a19..d33af2a477 100644 --- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py +++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py @@ -494,6 +494,7 @@ def convert_to_model_response_object( # noqa: PLR0915 provider_specific_fields=provider_specific_fields, reasoning_content=reasoning_content, thinking_blocks=thinking_blocks, + annotations=choice["message"].get("annotations", None), ) finish_reason = choice.get("finish_reason", None) if finish_reason is None: diff --git a/litellm/litellm_core_utils/model_param_helper.py b/litellm/litellm_core_utils/model_param_helper.py index 09a2c15a77..d792ede282 100644 --- a/litellm/litellm_core_utils/model_param_helper.py +++ b/litellm/litellm_core_utils/model_param_helper.py @@ -1,6 +1,5 @@ from typing import Set -from openai.types.audio.transcription_create_params import TranscriptionCreateParams from openai.types.chat.completion_create_params import ( CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming, @@ -13,6 +12,7 @@ from openai.types.completion_create_params import ( ) from openai.types.embedding_create_params import EmbeddingCreateParams +from litellm._logging import verbose_logger from litellm.types.rerank import RerankRequest @@ -84,8 +84,10 @@ class ModelParamHelper: This follows the OpenAI API Spec """ all_chat_completion_kwargs = set( - CompletionCreateParamsNonStreaming.__annotations__.keys() - ).union(set(CompletionCreateParamsStreaming.__annotations__.keys())) + getattr(CompletionCreateParamsNonStreaming, "__annotations__", {}).keys() + ).union( + set(getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys()) + ) return all_chat_completion_kwargs @staticmethod @@ -96,8 +98,16 @@ class ModelParamHelper: This follows the OpenAI API Spec """ all_text_completion_kwargs = set( - TextCompletionCreateParamsNonStreaming.__annotations__.keys() - ).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys())) + getattr( + TextCompletionCreateParamsNonStreaming, "__annotations__", {} + ).keys() + ).union( + set( + getattr( + TextCompletionCreateParamsStreaming, "__annotations__", {} + ).keys() + ) + ) return all_text_completion_kwargs @staticmethod @@ -114,7 +124,7 @@ class ModelParamHelper: This follows the OpenAI API Spec """ - return set(EmbeddingCreateParams.__annotations__.keys()) + return set(getattr(EmbeddingCreateParams, "__annotations__", {}).keys()) @staticmethod def _get_litellm_supported_transcription_kwargs() -> Set[str]: @@ -123,7 +133,19 @@ class ModelParamHelper: This follows the OpenAI API Spec """ - return set(TranscriptionCreateParams.__annotations__.keys()) + try: + from openai.types.audio.transcription_create_params import ( + TranscriptionCreateParamsNonStreaming, + TranscriptionCreateParamsStreaming, + ) + non_streaming_kwargs = set(getattr(TranscriptionCreateParamsNonStreaming, "__annotations__", {}).keys()) + streaming_kwargs = set(getattr(TranscriptionCreateParamsStreaming, "__annotations__", {}).keys()) + + all_transcription_kwargs = non_streaming_kwargs.union(streaming_kwargs) + return all_transcription_kwargs + except Exception as e: + verbose_logger.warning("Error getting transcription kwargs %s", str(e)) + return set() @staticmethod def _get_exclude_kwargs() -> Set[str]: diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 56e64d1859..a11e5af12b 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -799,6 +799,10 @@ class CustomStreamWrapper: "provider_specific_fields" in response_obj and response_obj["provider_specific_fields"] is not None ) + or ( + "annotations" in model_response.choices[0].delta + and model_response.choices[0].delta.annotations is not None + ) ): return True else: @@ -939,7 +943,6 @@ class CustomStreamWrapper: and model_response.choices[0].delta.audio is not None ): return model_response - else: if hasattr(model_response, "usage"): self.chunks.append(model_response) diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 383c1cd3e5..1a77c453f4 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -387,7 +387,7 @@ class AnthropicConfig(BaseConfig): _input_schema["additionalProperties"] = True _input_schema["properties"] = {} else: - _input_schema["properties"] = {"values": json_schema} + _input_schema.update(cast(AnthropicInputSchema, json_schema)) _tool = AnthropicMessagesTool( name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema diff --git a/litellm/llms/base_llm/responses/transformation.py b/litellm/llms/base_llm/responses/transformation.py index c41d63842b..29555c55da 100644 --- a/litellm/llms/base_llm/responses/transformation.py +++ b/litellm/llms/base_llm/responses/transformation.py @@ -7,7 +7,6 @@ import httpx from litellm.types.llms.openai import ( ResponseInputParam, ResponsesAPIOptionalRequestParams, - ResponsesAPIRequestParams, ResponsesAPIResponse, ResponsesAPIStreamingResponse, ) @@ -97,7 +96,7 @@ class BaseResponsesAPIConfig(ABC): response_api_optional_request_params: Dict, litellm_params: GenericLiteLLMParams, headers: dict, - ) -> ResponsesAPIRequestParams: + ) -> Dict: pass @abstractmethod @@ -131,3 +130,12 @@ class BaseResponsesAPIConfig(ABC): message=error_message, headers=headers, ) + + def should_fake_stream( + self, + model: Optional[str], + stream: Optional[bool], + custom_llm_provider: Optional[str] = None, + ) -> bool: + """Returns True if litellm should fake a stream for the given model and stream value""" + return False diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index 01fe36acda..00caf55207 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.responses.streaming_iterator import ( BaseResponsesAPIStreamingIterator, + MockResponsesAPIStreamingIterator, ResponsesAPIStreamingIterator, SyncResponsesAPIStreamingIterator, ) @@ -978,6 +979,7 @@ class BaseLLMHTTPHandler: timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, + fake_stream: bool = False, ) -> Union[ ResponsesAPIResponse, BaseResponsesAPIStreamingIterator, @@ -1003,6 +1005,7 @@ class BaseLLMHTTPHandler: extra_body=extra_body, timeout=timeout, client=client if isinstance(client, AsyncHTTPHandler) else None, + fake_stream=fake_stream, ) if client is None or not isinstance(client, HTTPHandler): @@ -1051,14 +1054,27 @@ class BaseLLMHTTPHandler: try: if stream: # For streaming, use stream=True in the request + if fake_stream is True: + stream, data = self._prepare_fake_stream_request( + stream=stream, + data=data, + fake_stream=fake_stream, + ) response = sync_httpx_client.post( url=api_base, headers=headers, data=json.dumps(data), timeout=timeout or response_api_optional_request_params.get("timeout"), - stream=True, + stream=stream, ) + if fake_stream is True: + return MockResponsesAPIStreamingIterator( + response=response, + model=model, + logging_obj=logging_obj, + responses_api_provider_config=responses_api_provider_config, + ) return SyncResponsesAPIStreamingIterator( response=response, @@ -1100,6 +1116,7 @@ class BaseLLMHTTPHandler: extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + fake_stream: bool = False, ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: """ Async version of the responses API handler. @@ -1145,22 +1162,36 @@ class BaseLLMHTTPHandler: "headers": headers, }, ) - # Check if streaming is requested stream = response_api_optional_request_params.get("stream", False) try: if stream: # For streaming, we need to use stream=True in the request + if fake_stream is True: + stream, data = self._prepare_fake_stream_request( + stream=stream, + data=data, + fake_stream=fake_stream, + ) + response = await async_httpx_client.post( url=api_base, headers=headers, data=json.dumps(data), timeout=timeout or response_api_optional_request_params.get("timeout"), - stream=True, + stream=stream, ) + if fake_stream is True: + return MockResponsesAPIStreamingIterator( + response=response, + model=model, + logging_obj=logging_obj, + responses_api_provider_config=responses_api_provider_config, + ) + # Return the streaming iterator return ResponsesAPIStreamingIterator( response=response, @@ -1177,6 +1208,7 @@ class BaseLLMHTTPHandler: timeout=timeout or response_api_optional_request_params.get("timeout"), ) + except Exception as e: raise self._handle_error( e=e, @@ -1189,6 +1221,21 @@ class BaseLLMHTTPHandler: logging_obj=logging_obj, ) + def _prepare_fake_stream_request( + self, + stream: bool, + data: dict, + fake_stream: bool, + ) -> Tuple[bool, dict]: + """ + Handles preparing a request when `fake_stream` is True. + """ + if fake_stream is True: + stream = False + data.pop("stream", None) + return stream, data + return stream, data + def _handle_error( self, e: Exception, diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index ce4052dc19..e062c0c9fa 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -65,10 +65,12 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): response_api_optional_request_params: Dict, litellm_params: GenericLiteLLMParams, headers: dict, - ) -> ResponsesAPIRequestParams: + ) -> Dict: """No transform applied since inputs are in OpenAI spec already""" - return ResponsesAPIRequestParams( - model=model, input=input, **response_api_optional_request_params + return dict( + ResponsesAPIRequestParams( + model=model, input=input, **response_api_optional_request_params + ) ) def transform_response_api_response( @@ -188,3 +190,27 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): raise ValueError(f"Unknown event type: {event_type}") return model_class + + def should_fake_stream( + self, + model: Optional[str], + stream: Optional[bool], + custom_llm_provider: Optional[str] = None, + ) -> bool: + if stream is not True: + return False + if model is not None: + try: + if ( + litellm.utils.supports_native_streaming( + model=model, + custom_llm_provider=custom_llm_provider, + ) + is False + ): + return True + except Exception as e: + verbose_logger.debug( + f"Error getting model info in OpenAIResponsesAPIConfig: {e}" + ) + return False diff --git a/litellm/llms/sagemaker/chat/handler.py b/litellm/llms/sagemaker/chat/handler.py index 3a90a15093..c827a8a5f7 100644 --- a/litellm/llms/sagemaker/chat/handler.py +++ b/litellm/llms/sagemaker/chat/handler.py @@ -5,6 +5,7 @@ from typing import Callable, Optional, Union import httpx from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.utils import ModelResponse, get_secret from ..common_utils import AWSEventStreamDecoder @@ -125,6 +126,7 @@ class SagemakerChatHandler(BaseAWSLLM): logger_fn=None, acompletion: bool = False, headers: dict = {}, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): # pop streaming if it's in the optional params as 'stream' raises an error with sagemaker @@ -173,4 +175,5 @@ class SagemakerChatHandler(BaseAWSLLM): custom_endpoint=True, custom_llm_provider="sagemaker_chat", streaming_decoder=custom_stream_decoder, # type: ignore + client=client, ) diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py index f7149c349a..a3f91fbacc 100644 --- a/litellm/llms/vertex_ai/common_utils.py +++ b/litellm/llms/vertex_ai/common_utils.py @@ -1,3 +1,4 @@ +import re from typing import Dict, List, Literal, Optional, Tuple, Union import httpx @@ -280,3 +281,81 @@ def _convert_vertex_datetime_to_openai_datetime(vertex_datetime: str) -> int: dt = datetime.strptime(vertex_datetime, "%Y-%m-%dT%H:%M:%S.%fZ") # Convert to Unix timestamp (seconds since epoch) return int(dt.timestamp()) + + +def get_vertex_project_id_from_url(url: str) -> Optional[str]: + """ + Get the vertex project id from the url + + `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent` + """ + match = re.search(r"/projects/([^/]+)", url) + return match.group(1) if match else None + + +def get_vertex_location_from_url(url: str) -> Optional[str]: + """ + Get the vertex location from the url + + `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent` + """ + match = re.search(r"/locations/([^/]+)", url) + return match.group(1) if match else None + + +def replace_project_and_location_in_route( + requested_route: str, vertex_project: str, vertex_location: str +) -> str: + """ + Replace project and location values in the route with the provided values + """ + # Replace project and location values while keeping route structure + modified_route = re.sub( + r"/projects/[^/]+/locations/[^/]+/", + f"/projects/{vertex_project}/locations/{vertex_location}/", + requested_route, + ) + return modified_route + + +def construct_target_url( + base_url: str, + requested_route: str, + vertex_location: Optional[str], + vertex_project: Optional[str], +) -> httpx.URL: + """ + Allow user to specify their own project id / location. + + If missing, use defaults + + Handle cachedContent scenario - https://github.com/BerriAI/litellm/issues/5460 + + Constructed Url: + POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents + """ + new_base_url = httpx.URL(base_url) + if "locations" in requested_route: # contains the target project id + location + if vertex_project and vertex_location: + requested_route = replace_project_and_location_in_route( + requested_route, vertex_project, vertex_location + ) + return new_base_url.copy_with(path=requested_route) + + """ + - Add endpoint version (e.g. v1beta for cachedContent, v1 for rest) + - Add default project id + - Add default location + """ + vertex_version: Literal["v1", "v1beta1"] = "v1" + if "cachedContent" in requested_route: + vertex_version = "v1beta1" + + base_requested_route = "{}/projects/{}/locations/{}".format( + vertex_version, vertex_project, vertex_location + ) + + updated_requested_route = "/" + base_requested_route + requested_route + + updated_url = new_base_url.copy_with(path=updated_requested_route) + return updated_url diff --git a/litellm/main.py b/litellm/main.py index 6cc1057bb4..1826f2df78 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2604,6 +2604,7 @@ def completion( # type: ignore # noqa: PLR0915 encoding=encoding, logging_obj=logging, acompletion=acompletion, + client=client, ) ## RESPONSE OBJECT diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f2ca9156ad..1d4353e3ed 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -15,6 +15,12 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.0000, + "search_context_size_medium": 0.0000, + "search_context_size_high": 0.0000 + }, "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD" }, "omni-moderation-latest": { @@ -74,7 +80,63 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } + }, + "gpt-4o-search-preview-2025-03-11": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } + }, + "gpt-4o-search-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } }, "gpt-4.5-preview": { "max_tokens": 16384, @@ -199,7 +261,63 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } + }, + "gpt-4o-mini-search-preview-2025-03-11":{ + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } + }, + "gpt-4o-mini-search-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } }, "gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -218,7 +336,12 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "search_context_cost_per_query": { + "search_context_size_low": 30.00, + "search_context_size_medium": 35.00, + "search_context_size_high": 50.00 + } }, "o1-pro": { "max_tokens": 100000, @@ -425,7 +548,13 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } }, "gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -1426,6 +1555,25 @@ "supports_vision": false, "supports_prompt_caching": true }, + "azure/gpt-4.5-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000075, + "output_cost_per_token": 0.00015, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-4o": { "max_tokens": 16384, "max_input_tokens": 128000, @@ -2091,6 +2239,18 @@ "mode": "chat", "supports_tool_choice": true }, + "azure_ai/mistral-small-2503": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true + }, "azure_ai/mistral-large-2407": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 86172ae269..cd49647464 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -15,4 +15,12 @@ router_settings: redis_password: os.environ/REDIS_PASSWORD redis_port: os.environ/REDIS_PORT - +general_settings: + enable_jwt_auth: True + litellm_jwtauth: + admin_jwt_scope: "ai.admin" + # team_id_jwt_field: "client_id" # 👈 CAN BE ANY FIELD + user_id_jwt_field: "sub" # 👈 CAN BE ANY FIELD + org_id_jwt_field: "org_id" # 👈 CAN BE ANY FIELD + end_user_id_jwt_field: "customer_id" # 👈 CAN BE ANY FIELD + user_id_upsert: True \ No newline at end of file diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 255e37186d..220a0d5ddb 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -1631,7 +1631,7 @@ class LiteLLM_UserTable(LiteLLMPydanticObjectBase): class LiteLLM_UserTableFiltered(BaseModel): # done to avoid exposing sensitive data user_id: str - user_email: str + user_email: Optional[str] = None class LiteLLM_UserTableWithKeyCount(LiteLLM_UserTable): diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index f029511dd2..80cfb03de4 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -14,7 +14,7 @@ import time import traceback from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, cast -from fastapi import status +from fastapi import Request, status from pydantic import BaseModel import litellm @@ -74,6 +74,7 @@ async def common_checks( llm_router: Optional[Router], proxy_logging_obj: ProxyLogging, valid_token: Optional[UserAPIKeyAuth], + request: Request, ) -> bool: """ Common checks across jwt + key-based auth. @@ -198,9 +199,134 @@ async def common_checks( user_object=user_object, route=route, request_body=request_body ) + token_team = getattr(valid_token, "team_id", None) + token_type: Literal["ui", "api"] = ( + "ui" if token_team is not None and token_team == "litellm-dashboard" else "api" + ) + _is_route_allowed = _is_allowed_route( + route=route, + token_type=token_type, + user_obj=user_object, + request=request, + request_data=request_body, + valid_token=valid_token, + ) + return True +def _is_ui_route( + route: str, + user_obj: Optional[LiteLLM_UserTable] = None, +) -> bool: + """ + - Check if the route is a UI used route + """ + # this token is only used for managing the ui + allowed_routes = LiteLLMRoutes.ui_routes.value + # check if the current route startswith any of the allowed routes + if ( + route is not None + and isinstance(route, str) + and any(route.startswith(allowed_route) for allowed_route in allowed_routes) + ): + # Do something if the current route starts with any of the allowed routes + return True + elif any( + RouteChecks._route_matches_pattern(route=route, pattern=allowed_route) + for allowed_route in allowed_routes + ): + return True + return False + + +def _get_user_role( + user_obj: Optional[LiteLLM_UserTable], +) -> Optional[LitellmUserRoles]: + if user_obj is None: + return None + + _user = user_obj + + _user_role = _user.user_role + try: + role = LitellmUserRoles(_user_role) + except ValueError: + return LitellmUserRoles.INTERNAL_USER + + return role + + +def _is_api_route_allowed( + route: str, + request: Request, + request_data: dict, + valid_token: Optional[UserAPIKeyAuth], + user_obj: Optional[LiteLLM_UserTable] = None, +) -> bool: + """ + - Route b/w api token check and normal token check + """ + _user_role = _get_user_role(user_obj=user_obj) + + if valid_token is None: + raise Exception("Invalid proxy server token passed. valid_token=None.") + + if not _is_user_proxy_admin(user_obj=user_obj): # if non-admin + RouteChecks.non_proxy_admin_allowed_routes_check( + user_obj=user_obj, + _user_role=_user_role, + route=route, + request=request, + request_data=request_data, + valid_token=valid_token, + ) + return True + + +def _is_user_proxy_admin(user_obj: Optional[LiteLLM_UserTable]): + if user_obj is None: + return False + + if ( + user_obj.user_role is not None + and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value + ): + return True + + if ( + user_obj.user_role is not None + and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value + ): + return True + + return False + + +def _is_allowed_route( + route: str, + token_type: Literal["ui", "api"], + request: Request, + request_data: dict, + valid_token: Optional[UserAPIKeyAuth], + user_obj: Optional[LiteLLM_UserTable] = None, +) -> bool: + """ + - Route b/w ui token check and normal token check + """ + + if token_type == "ui" and _is_ui_route(route=route, user_obj=user_obj): + return True + else: + return _is_api_route_allowed( + route=route, + request=request, + request_data=request_data, + valid_token=valid_token, + user_obj=user_obj, + ) + + def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool: """ Return if a user is allowed to access route. Helper function for `allowed_routes_check`. diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index 91fcaf7e11..2c4b122d3a 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -321,6 +321,7 @@ async def check_if_request_size_is_safe(request: Request) -> bool: from litellm.proxy.proxy_server import general_settings, premium_user max_request_size_mb = general_settings.get("max_request_size_mb", None) + if max_request_size_mb is not None: # Check if premium user if premium_user is not True: diff --git a/litellm/proxy/auth/route_checks.py b/litellm/proxy/auth/route_checks.py index a18a7ab5e1..8f956abb72 100644 --- a/litellm/proxy/auth/route_checks.py +++ b/litellm/proxy/auth/route_checks.py @@ -24,7 +24,6 @@ class RouteChecks: route: str, request: Request, valid_token: UserAPIKeyAuth, - api_key: str, request_data: dict, ): """ diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index ace0bf4948..b78619ae65 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -25,7 +25,9 @@ from litellm.litellm_core_utils.dd_tracing import tracer from litellm.proxy._types import * from litellm.proxy.auth.auth_checks import ( _cache_key_object, + _get_user_role, _handle_failed_db_connection_for_get_key_object, + _is_user_proxy_admin, _virtual_key_max_budget_check, _virtual_key_soft_budget_check, can_key_call_model, @@ -48,7 +50,6 @@ from litellm.proxy.auth.auth_utils import ( from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler from litellm.proxy.auth.oauth2_check import check_oauth2_token from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request -from litellm.proxy.auth.route_checks import RouteChecks from litellm.proxy.auth.service_account_checks import service_account_checks from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.utils import PrismaClient, ProxyLogging @@ -98,86 +99,6 @@ def _get_bearer_token( return api_key -def _is_ui_route( - route: str, - user_obj: Optional[LiteLLM_UserTable] = None, -) -> bool: - """ - - Check if the route is a UI used route - """ - # this token is only used for managing the ui - allowed_routes = LiteLLMRoutes.ui_routes.value - # check if the current route startswith any of the allowed routes - if ( - route is not None - and isinstance(route, str) - and any(route.startswith(allowed_route) for allowed_route in allowed_routes) - ): - # Do something if the current route starts with any of the allowed routes - return True - elif any( - RouteChecks._route_matches_pattern(route=route, pattern=allowed_route) - for allowed_route in allowed_routes - ): - return True - return False - - -def _is_api_route_allowed( - route: str, - request: Request, - request_data: dict, - api_key: str, - valid_token: Optional[UserAPIKeyAuth], - user_obj: Optional[LiteLLM_UserTable] = None, -) -> bool: - """ - - Route b/w api token check and normal token check - """ - _user_role = _get_user_role(user_obj=user_obj) - - if valid_token is None: - raise Exception("Invalid proxy server token passed. valid_token=None.") - - if not _is_user_proxy_admin(user_obj=user_obj): # if non-admin - RouteChecks.non_proxy_admin_allowed_routes_check( - user_obj=user_obj, - _user_role=_user_role, - route=route, - request=request, - request_data=request_data, - api_key=api_key, - valid_token=valid_token, - ) - return True - - -def _is_allowed_route( - route: str, - token_type: Literal["ui", "api"], - request: Request, - request_data: dict, - api_key: str, - valid_token: Optional[UserAPIKeyAuth], - user_obj: Optional[LiteLLM_UserTable] = None, -) -> bool: - """ - - Route b/w ui token check and normal token check - """ - - if token_type == "ui" and _is_ui_route(route=route, user_obj=user_obj): - return True - else: - return _is_api_route_allowed( - route=route, - request=request, - request_data=request_data, - api_key=api_key, - valid_token=valid_token, - user_obj=user_obj, - ) - - async def user_api_key_auth_websocket(websocket: WebSocket): # Accept the WebSocket connection @@ -328,6 +249,8 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 parent_otel_span: Optional[Span] = None start_time = datetime.now() route: str = get_request_route(request=request) + valid_token: Optional[UserAPIKeyAuth] = None + try: # get the request body @@ -470,22 +393,8 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 user_role=LitellmUserRoles.PROXY_ADMIN, parent_otel_span=parent_otel_span, ) - # run through common checks - _ = await common_checks( - request_body=request_data, - team_object=team_object, - user_object=user_object, - end_user_object=end_user_object, - general_settings=general_settings, - global_proxy_spend=global_proxy_spend, - route=route, - llm_router=llm_router, - proxy_logging_obj=proxy_logging_obj, - valid_token=None, - ) - # return UserAPIKeyAuth object - return UserAPIKeyAuth( + valid_token = UserAPIKeyAuth( api_key=None, team_id=team_id, team_tpm_limit=( @@ -501,6 +410,23 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 parent_otel_span=parent_otel_span, end_user_id=end_user_id, ) + # run through common checks + _ = await common_checks( + request=request, + request_body=request_data, + team_object=team_object, + user_object=user_object, + end_user_object=end_user_object, + general_settings=general_settings, + global_proxy_spend=global_proxy_spend, + route=route, + llm_router=llm_router, + proxy_logging_obj=proxy_logging_obj, + valid_token=valid_token, + ) + + # return UserAPIKeyAuth object + return cast(UserAPIKeyAuth, valid_token) #### ELSE #### ## CHECK PASS-THROUGH ENDPOINTS ## @@ -1038,6 +964,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 ) ) _ = await common_checks( + request=request, request_body=request_data, team_object=_team_obj, user_object=user_obj, @@ -1075,23 +1002,6 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 # check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions # sso/login, ui/login, /key functions and /user functions # this will never be allowed to call /chat/completions - token_team = getattr(valid_token, "team_id", None) - token_type: Literal["ui", "api"] = ( - "ui" - if token_team is not None and token_team == "litellm-dashboard" - else "api" - ) - _is_route_allowed = _is_allowed_route( - route=route, - token_type=token_type, - user_obj=user_obj, - request=request, - request_data=request_data, - api_key=api_key, - valid_token=valid_token, - ) - if not _is_route_allowed: - raise HTTPException(401, detail="Invalid route for UI token") if valid_token is None: # No token was found when looking up in the DB @@ -1242,42 +1152,6 @@ async def _return_user_api_key_auth_obj( return UserAPIKeyAuth(**user_api_key_kwargs) -def _is_user_proxy_admin(user_obj: Optional[LiteLLM_UserTable]): - if user_obj is None: - return False - - if ( - user_obj.user_role is not None - and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value - ): - return True - - if ( - user_obj.user_role is not None - and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value - ): - return True - - return False - - -def _get_user_role( - user_obj: Optional[LiteLLM_UserTable], -) -> Optional[LitellmUserRoles]: - if user_obj is None: - return None - - _user = user_obj - - _user_role = _user.user_role - try: - role = LitellmUserRoles(_user_role) - except ValueError: - return LitellmUserRoles.INTERNAL_USER - - return role - - def get_api_key_from_custom_header( request: Request, custom_litellm_key_header_name: str ) -> str: diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 7f131efb04..fcc13509ce 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -57,7 +57,9 @@ class ProxyBaseLLMRequestProcessing: "x-litellm-call-id": call_id, "x-litellm-model-id": model_id, "x-litellm-cache-key": cache_key, - "x-litellm-model-api-base": api_base, + "x-litellm-model-api-base": ( + api_base.split("?")[0] if api_base else None + ), # don't include query params, risk of leaking sensitive info "x-litellm-version": version, "x-litellm-model-region": model_region, "x-litellm-response-cost": str(response_cost), diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index 43d8273dee..e9be169cdc 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -1240,4 +1240,5 @@ async def ui_view_users( return [LiteLLM_UserTableFiltered(**user.model_dump()) for user in users] except Exception as e: + verbose_proxy_logger.exception(f"Error searching users: {str(e)}") raise HTTPException(status_code=500, detail=f"Error searching users: {str(e)}") diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py index 1994e27ecf..f5bcc6ba11 100644 --- a/litellm/proxy/management_endpoints/team_endpoints.py +++ b/litellm/proxy/management_endpoints/team_endpoints.py @@ -470,7 +470,7 @@ async def update_team( if existing_team_row is None: raise HTTPException( - status_code=400, + status_code=404, detail={"error": f"Team not found, passed team_id={data.team_id}"}, ) @@ -1137,14 +1137,16 @@ async def delete_team( team_rows: List[LiteLLM_TeamTable] = [] for team_id in data.team_ids: try: - team_row_base: BaseModel = ( + team_row_base: Optional[BaseModel] = ( await prisma_client.db.litellm_teamtable.find_unique( where={"team_id": team_id} ) ) + if team_row_base is None: + raise Exception except Exception: raise HTTPException( - status_code=400, + status_code=404, detail={"error": f"Team not found, passed team_id={team_id}"}, ) team_row_pydantic = LiteLLM_TeamTable(**team_row_base.model_dump()) diff --git a/litellm/proxy/pass_through_endpoints/common_utils.py b/litellm/proxy/pass_through_endpoints/common_utils.py new file mode 100644 index 0000000000..3a3783dd57 --- /dev/null +++ b/litellm/proxy/pass_through_endpoints/common_utils.py @@ -0,0 +1,16 @@ +from fastapi import Request + + +def get_litellm_virtual_key(request: Request) -> str: + """ + Extract and format API key from request headers. + Prioritizes x-litellm-api-key over Authorization header. + + + Vertex JS SDK uses `Authorization` header, we use `x-litellm-api-key` to pass litellm virtual key + + """ + litellm_api_key = request.headers.get("x-litellm-api-key") + if litellm_api_key: + return f"Bearer {litellm_api_key}" + return request.headers.get("Authorization", "") diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 4724c7f9d1..c4d96b67f6 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -12,10 +12,13 @@ import httpx from fastapi import APIRouter, Depends, HTTPException, Request, Response import litellm +from litellm._logging import verbose_proxy_logger from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES +from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.proxy._types import * from litellm.proxy.auth.route_checks import RouteChecks from litellm.proxy.auth.user_api_key_auth import user_api_key_auth +from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key from litellm.proxy.pass_through_endpoints.pass_through_endpoints import ( create_pass_through_route, ) @@ -23,6 +26,7 @@ from litellm.secret_managers.main import get_secret_str from .passthrough_endpoint_router import PassthroughEndpointRouter +vertex_llm_base = VertexBase() router = APIRouter() default_vertex_config = None @@ -417,6 +421,138 @@ async def azure_proxy_route( ) +@router.api_route( + "/vertex-ai/{endpoint:path}", + methods=["GET", "POST", "PUT", "DELETE", "PATCH"], + tags=["Vertex AI Pass-through", "pass-through"], + include_in_schema=False, +) +@router.api_route( + "/vertex_ai/{endpoint:path}", + methods=["GET", "POST", "PUT", "DELETE", "PATCH"], + tags=["Vertex AI Pass-through", "pass-through"], +) +async def vertex_proxy_route( + endpoint: str, + request: Request, + fastapi_response: Response, +): + """ + Call LiteLLM proxy via Vertex AI SDK. + + [Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai) + """ + from litellm.llms.vertex_ai.common_utils import ( + construct_target_url, + get_vertex_location_from_url, + get_vertex_project_id_from_url, + ) + + encoded_endpoint = httpx.URL(endpoint).path + verbose_proxy_logger.debug("requested endpoint %s", endpoint) + headers: dict = {} + api_key_to_use = get_litellm_virtual_key(request=request) + user_api_key_dict = await user_api_key_auth( + request=request, + api_key=api_key_to_use, + ) + vertex_project: Optional[str] = get_vertex_project_id_from_url(endpoint) + vertex_location: Optional[str] = get_vertex_location_from_url(endpoint) + vertex_credentials = passthrough_endpoint_router.get_vertex_credentials( + project_id=vertex_project, + location=vertex_location, + ) + + headers_passed_through = False + # Use headers from the incoming request if no vertex credentials are found + if vertex_credentials is None or vertex_credentials.vertex_project is None: + headers = dict(request.headers) or {} + headers_passed_through = True + verbose_proxy_logger.debug( + "default_vertex_config not set, incoming request headers %s", headers + ) + base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/" + headers.pop("content-length", None) + headers.pop("host", None) + else: + vertex_project = vertex_credentials.vertex_project + vertex_location = vertex_credentials.vertex_location + vertex_credentials_str = vertex_credentials.vertex_credentials + + # Construct base URL for the target endpoint + base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/" + + _auth_header, vertex_project = await vertex_llm_base._ensure_access_token_async( + credentials=vertex_credentials_str, + project_id=vertex_project, + custom_llm_provider="vertex_ai_beta", + ) + + auth_header, _ = vertex_llm_base._get_token_and_url( + model="", + auth_header=_auth_header, + gemini_api_key=None, + vertex_credentials=vertex_credentials_str, + vertex_project=vertex_project, + vertex_location=vertex_location, + stream=False, + custom_llm_provider="vertex_ai_beta", + api_base="", + ) + + headers = { + "Authorization": f"Bearer {auth_header}", + } + + request_route = encoded_endpoint + verbose_proxy_logger.debug("request_route %s", request_route) + + # Ensure endpoint starts with '/' for proper URL construction + if not encoded_endpoint.startswith("/"): + encoded_endpoint = "/" + encoded_endpoint + + # Construct the full target URL using httpx + updated_url = construct_target_url( + base_url=base_target_url, + requested_route=encoded_endpoint, + vertex_location=vertex_location, + vertex_project=vertex_project, + ) + + verbose_proxy_logger.debug("updated url %s", updated_url) + + ## check for streaming + target = str(updated_url) + is_streaming_request = False + if "stream" in str(updated_url): + is_streaming_request = True + target += "?alt=sse" + + ## CREATE PASS-THROUGH + endpoint_func = create_pass_through_route( + endpoint=endpoint, + target=target, + custom_headers=headers, + ) # dynamically construct pass-through endpoint based on incoming path + + try: + received_value = await endpoint_func( + request, + fastapi_response, + user_api_key_dict, + stream=is_streaming_request, # type: ignore + ) + except Exception as e: + if headers_passed_through: + raise Exception( + f"No credentials found on proxy for this request. Headers were passed through directly but request failed with error: {str(e)}" + ) + else: + raise e + + return received_value + + @router.api_route( "/openai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"], diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index b13d614678..a13b0dc216 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -1,6 +1,7 @@ import ast import asyncio import json +import uuid from base64 import b64encode from datetime import datetime from typing import Dict, List, Optional, Union @@ -284,7 +285,9 @@ class HttpPassThroughEndpointHelpers: @staticmethod def get_response_headers( - headers: httpx.Headers, litellm_call_id: Optional[str] = None + headers: httpx.Headers, + litellm_call_id: Optional[str] = None, + custom_headers: Optional[dict] = None, ) -> dict: excluded_headers = {"transfer-encoding", "content-encoding"} @@ -295,6 +298,8 @@ class HttpPassThroughEndpointHelpers: } if litellm_call_id: return_headers["x-litellm-call-id"] = litellm_call_id + if custom_headers: + return_headers.update(custom_headers) return return_headers @@ -365,8 +370,9 @@ async def pass_through_request( # noqa: PLR0915 query_params: Optional[dict] = None, stream: Optional[bool] = None, ): + litellm_call_id = str(uuid.uuid4()) + url: Optional[httpx.URL] = None try: - import uuid from litellm.litellm_core_utils.litellm_logging import Logging from litellm.proxy.proxy_server import proxy_logging_obj @@ -416,8 +422,6 @@ async def pass_through_request( # noqa: PLR0915 ) async_client = async_client_obj.client - litellm_call_id = str(uuid.uuid4()) - # create logging object start_time = datetime.now() logging_obj = Logging( @@ -596,15 +600,31 @@ async def pass_through_request( # noqa: PLR0915 ) ) + ## CUSTOM HEADERS - `x-litellm-*` + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=user_api_key_dict, + call_id=litellm_call_id, + model_id=None, + cache_key=None, + api_base=str(url._uri_reference), + ) + return Response( content=content, status_code=response.status_code, headers=HttpPassThroughEndpointHelpers.get_response_headers( headers=response.headers, - litellm_call_id=litellm_call_id, + custom_headers=custom_headers, ), ) except Exception as e: + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=user_api_key_dict, + call_id=litellm_call_id, + model_id=None, + cache_key=None, + api_base=str(url._uri_reference) if url else None, + ) verbose_proxy_logger.exception( "litellm.proxy.proxy_server.pass_through_endpoint(): Exception occured - {}".format( str(e) @@ -616,6 +636,7 @@ async def pass_through_request( # noqa: PLR0915 type=getattr(e, "type", "None"), param=getattr(e, "param", "None"), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), + headers=custom_headers, ) else: error_msg = f"{str(e)}" @@ -624,6 +645,7 @@ async def pass_through_request( # noqa: PLR0915 type=getattr(e, "type", "None"), param=getattr(e, "param", "None"), code=getattr(e, "status_code", 500), + headers=custom_headers, ) diff --git a/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py b/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py index adf7d0f30c..89cccfc071 100644 --- a/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py +++ b/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py @@ -1,7 +1,9 @@ from typing import Dict, Optional -from litellm._logging import verbose_logger +from litellm._logging import verbose_router_logger from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES +from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials class PassthroughEndpointRouter: @@ -11,6 +13,10 @@ class PassthroughEndpointRouter: def __init__(self): self.credentials: Dict[str, str] = {} + self.deployment_key_to_vertex_credentials: Dict[ + str, VertexPassThroughCredentials + ] = {} + self.default_vertex_config: Optional[VertexPassThroughCredentials] = None def set_pass_through_credentials( self, @@ -45,14 +51,14 @@ class PassthroughEndpointRouter: custom_llm_provider=custom_llm_provider, region_name=region_name, ) - verbose_logger.debug( + verbose_router_logger.debug( f"Pass-through llm endpoints router, looking for credentials for {credential_name}" ) if credential_name in self.credentials: - verbose_logger.debug(f"Found credentials for {credential_name}") + verbose_router_logger.debug(f"Found credentials for {credential_name}") return self.credentials[credential_name] else: - verbose_logger.debug( + verbose_router_logger.debug( f"No credentials found for {credential_name}, looking for env variable" ) _env_variable_name = ( @@ -62,6 +68,100 @@ class PassthroughEndpointRouter: ) return get_secret_str(_env_variable_name) + def _get_vertex_env_vars(self) -> VertexPassThroughCredentials: + """ + Helper to get vertex pass through config from environment variables + + The following environment variables are used: + - DEFAULT_VERTEXAI_PROJECT (project id) + - DEFAULT_VERTEXAI_LOCATION (location) + - DEFAULT_GOOGLE_APPLICATION_CREDENTIALS (path to credentials file) + """ + return VertexPassThroughCredentials( + vertex_project=get_secret_str("DEFAULT_VERTEXAI_PROJECT"), + vertex_location=get_secret_str("DEFAULT_VERTEXAI_LOCATION"), + vertex_credentials=get_secret_str("DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"), + ) + + def set_default_vertex_config(self, config: Optional[dict] = None): + """Sets vertex configuration from provided config and/or environment variables + + Args: + config (Optional[dict]): Configuration dictionary + Example: { + "vertex_project": "my-project-123", + "vertex_location": "us-central1", + "vertex_credentials": "os.environ/GOOGLE_CREDS" + } + """ + # Initialize config dictionary if None + if config is None: + self.default_vertex_config = self._get_vertex_env_vars() + return + + if isinstance(config, dict): + for key, value in config.items(): + if isinstance(value, str) and value.startswith("os.environ/"): + config[key] = get_secret_str(value) + + self.default_vertex_config = VertexPassThroughCredentials(**config) + + def add_vertex_credentials( + self, + project_id: str, + location: str, + vertex_credentials: VERTEX_CREDENTIALS_TYPES, + ): + """ + Add the vertex credentials for the given project-id, location + """ + + deployment_key = self._get_deployment_key( + project_id=project_id, + location=location, + ) + if deployment_key is None: + verbose_router_logger.debug( + "No deployment key found for project-id, location" + ) + return + vertex_pass_through_credentials = VertexPassThroughCredentials( + vertex_project=project_id, + vertex_location=location, + vertex_credentials=vertex_credentials, + ) + self.deployment_key_to_vertex_credentials[deployment_key] = ( + vertex_pass_through_credentials + ) + + def _get_deployment_key( + self, project_id: Optional[str], location: Optional[str] + ) -> Optional[str]: + """ + Get the deployment key for the given project-id, location + """ + if project_id is None or location is None: + return None + return f"{project_id}-{location}" + + def get_vertex_credentials( + self, project_id: Optional[str], location: Optional[str] + ) -> Optional[VertexPassThroughCredentials]: + """ + Get the vertex credentials for the given project-id, location + """ + deployment_key = self._get_deployment_key( + project_id=project_id, + location=location, + ) + + if deployment_key is None: + return self.default_vertex_config + if deployment_key in self.deployment_key_to_vertex_credentials: + return self.deployment_key_to_vertex_credentials[deployment_key] + else: + return self.default_vertex_config + def _get_credential_name_for_provider( self, custom_llm_provider: str, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 9185b2d22a..e7cc131fa9 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -239,6 +239,9 @@ from litellm.proxy.openai_files_endpoints.files_endpoints import ( router as openai_files_router, ) from litellm.proxy.openai_files_endpoints.files_endpoints import set_files_config +from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + passthrough_endpoint_router, +) from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( router as llm_passthrough_router, ) @@ -276,8 +279,6 @@ from litellm.proxy.utils import ( from litellm.proxy.vertex_ai_endpoints.langfuse_endpoints import ( router as langfuse_router, ) -from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router -from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config from litellm.router import ( AssistantsTypedDict, Deployment, @@ -2119,7 +2120,9 @@ class ProxyConfig: ## default config for vertex ai routes default_vertex_config = config.get("default_vertex_config", None) - set_default_vertex_config(config=default_vertex_config) + passthrough_endpoint_router.set_default_vertex_config( + config=default_vertex_config + ) ## ROUTER SETTINGS (e.g. routing_strategy, ...) router_settings = config.get("router_settings", None) @@ -8170,7 +8173,6 @@ app.include_router(batches_router) app.include_router(rerank_router) app.include_router(fine_tuning_router) app.include_router(credential_router) -app.include_router(vertex_router) app.include_router(llm_passthrough_router) app.include_router(mcp_router) app.include_router(anthropic_router) diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py deleted file mode 100644 index 7444e3d1c1..0000000000 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ /dev/null @@ -1,274 +0,0 @@ -import traceback -from typing import Optional - -import httpx -from fastapi import APIRouter, HTTPException, Request, Response, status - -import litellm -from litellm._logging import verbose_proxy_logger -from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance -from litellm.proxy._types import * -from litellm.proxy.auth.user_api_key_auth import user_api_key_auth -from litellm.proxy.pass_through_endpoints.pass_through_endpoints import ( - create_pass_through_route, -) -from litellm.secret_managers.main import get_secret_str -from litellm.types.passthrough_endpoints.vertex_ai import * - -from .vertex_passthrough_router import VertexPassThroughRouter - -router = APIRouter() -vertex_pass_through_router = VertexPassThroughRouter() - -default_vertex_config: VertexPassThroughCredentials = VertexPassThroughCredentials() - - -def _get_vertex_env_vars() -> VertexPassThroughCredentials: - """ - Helper to get vertex pass through config from environment variables - - The following environment variables are used: - - DEFAULT_VERTEXAI_PROJECT (project id) - - DEFAULT_VERTEXAI_LOCATION (location) - - DEFAULT_GOOGLE_APPLICATION_CREDENTIALS (path to credentials file) - """ - return VertexPassThroughCredentials( - vertex_project=get_secret_str("DEFAULT_VERTEXAI_PROJECT"), - vertex_location=get_secret_str("DEFAULT_VERTEXAI_LOCATION"), - vertex_credentials=get_secret_str("DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"), - ) - - -def set_default_vertex_config(config: Optional[dict] = None): - """Sets vertex configuration from provided config and/or environment variables - - Args: - config (Optional[dict]): Configuration dictionary - Example: { - "vertex_project": "my-project-123", - "vertex_location": "us-central1", - "vertex_credentials": "os.environ/GOOGLE_CREDS" - } - """ - global default_vertex_config - - # Initialize config dictionary if None - if config is None: - default_vertex_config = _get_vertex_env_vars() - return - - if isinstance(config, dict): - for key, value in config.items(): - if isinstance(value, str) and value.startswith("os.environ/"): - config[key] = litellm.get_secret(value) - - _set_default_vertex_config(VertexPassThroughCredentials(**config)) - - -def _set_default_vertex_config( - vertex_pass_through_credentials: VertexPassThroughCredentials, -): - global default_vertex_config - default_vertex_config = vertex_pass_through_credentials - - -def exception_handler(e: Exception): - verbose_proxy_logger.error( - "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format( - str(e) - ) - ) - verbose_proxy_logger.debug(traceback.format_exc()) - if isinstance(e, HTTPException): - return ProxyException( - message=getattr(e, "message", str(e.detail)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - ) - else: - error_msg = f"{str(e)}" - return ProxyException( - message=getattr(e, "message", error_msg), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", 500), - ) - - -def construct_target_url( - base_url: str, - requested_route: str, - default_vertex_location: Optional[str], - default_vertex_project: Optional[str], -) -> httpx.URL: - """ - Allow user to specify their own project id / location. - - If missing, use defaults - - Handle cachedContent scenario - https://github.com/BerriAI/litellm/issues/5460 - - Constructed Url: - POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents - """ - new_base_url = httpx.URL(base_url) - if "locations" in requested_route: # contains the target project id + location - updated_url = new_base_url.copy_with(path=requested_route) - return updated_url - """ - - Add endpoint version (e.g. v1beta for cachedContent, v1 for rest) - - Add default project id - - Add default location - """ - vertex_version: Literal["v1", "v1beta1"] = "v1" - if "cachedContent" in requested_route: - vertex_version = "v1beta1" - - base_requested_route = "{}/projects/{}/locations/{}".format( - vertex_version, default_vertex_project, default_vertex_location - ) - - updated_requested_route = "/" + base_requested_route + requested_route - - updated_url = new_base_url.copy_with(path=updated_requested_route) - return updated_url - - -@router.api_route( - "/vertex-ai/{endpoint:path}", - methods=["GET", "POST", "PUT", "DELETE", "PATCH"], - tags=["Vertex AI Pass-through", "pass-through"], - include_in_schema=False, -) -@router.api_route( - "/vertex_ai/{endpoint:path}", - methods=["GET", "POST", "PUT", "DELETE", "PATCH"], - tags=["Vertex AI Pass-through", "pass-through"], -) -async def vertex_proxy_route( - endpoint: str, - request: Request, - fastapi_response: Response, -): - """ - Call LiteLLM proxy via Vertex AI SDK. - - [Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai) - """ - encoded_endpoint = httpx.URL(endpoint).path - verbose_proxy_logger.debug("requested endpoint %s", endpoint) - headers: dict = {} - api_key_to_use = get_litellm_virtual_key(request=request) - user_api_key_dict = await user_api_key_auth( - request=request, - api_key=api_key_to_use, - ) - - vertex_project: Optional[str] = ( - VertexPassThroughRouter._get_vertex_project_id_from_url(endpoint) - ) - vertex_location: Optional[str] = ( - VertexPassThroughRouter._get_vertex_location_from_url(endpoint) - ) - vertex_credentials = vertex_pass_through_router.get_vertex_credentials( - project_id=vertex_project, - location=vertex_location, - ) - - # Use headers from the incoming request if no vertex credentials are found - if vertex_credentials.vertex_project is None: - headers = dict(request.headers) or {} - verbose_proxy_logger.debug( - "default_vertex_config not set, incoming request headers %s", headers - ) - base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/" - headers.pop("content-length", None) - headers.pop("host", None) - else: - vertex_project = vertex_credentials.vertex_project - vertex_location = vertex_credentials.vertex_location - vertex_credentials_str = vertex_credentials.vertex_credentials - - # Construct base URL for the target endpoint - base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/" - - _auth_header, vertex_project = ( - await vertex_fine_tuning_apis_instance._ensure_access_token_async( - credentials=vertex_credentials_str, - project_id=vertex_project, - custom_llm_provider="vertex_ai_beta", - ) - ) - - auth_header, _ = vertex_fine_tuning_apis_instance._get_token_and_url( - model="", - auth_header=_auth_header, - gemini_api_key=None, - vertex_credentials=vertex_credentials_str, - vertex_project=vertex_project, - vertex_location=vertex_location, - stream=False, - custom_llm_provider="vertex_ai_beta", - api_base="", - ) - - headers = { - "Authorization": f"Bearer {auth_header}", - } - - request_route = encoded_endpoint - verbose_proxy_logger.debug("request_route %s", request_route) - - # Ensure endpoint starts with '/' for proper URL construction - if not encoded_endpoint.startswith("/"): - encoded_endpoint = "/" + encoded_endpoint - - # Construct the full target URL using httpx - updated_url = construct_target_url( - base_url=base_target_url, - requested_route=encoded_endpoint, - default_vertex_location=vertex_location, - default_vertex_project=vertex_project, - ) - # base_url = httpx.URL(base_target_url) - # updated_url = base_url.copy_with(path=encoded_endpoint) - - verbose_proxy_logger.debug("updated url %s", updated_url) - - ## check for streaming - target = str(updated_url) - is_streaming_request = False - if "stream" in str(updated_url): - is_streaming_request = True - target += "?alt=sse" - - ## CREATE PASS-THROUGH - endpoint_func = create_pass_through_route( - endpoint=endpoint, - target=target, - custom_headers=headers, - ) # dynamically construct pass-through endpoint based on incoming path - received_value = await endpoint_func( - request, - fastapi_response, - user_api_key_dict, - stream=is_streaming_request, # type: ignore - ) - - return received_value - - -def get_litellm_virtual_key(request: Request) -> str: - """ - Extract and format API key from request headers. - Prioritizes x-litellm-api-key over Authorization header. - - - Vertex JS SDK uses `Authorization` header, we use `x-litellm-api-key` to pass litellm virtual key - - """ - litellm_api_key = request.headers.get("x-litellm-api-key") - if litellm_api_key: - return f"Bearer {litellm_api_key}" - return request.headers.get("Authorization", "") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py b/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py deleted file mode 100644 index 0273a62047..0000000000 --- a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py +++ /dev/null @@ -1,121 +0,0 @@ -import json -import re -from typing import Dict, Optional - -from litellm._logging import verbose_proxy_logger -from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - VertexPassThroughCredentials, -) -from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES - - -class VertexPassThroughRouter: - """ - Vertex Pass Through Router for Vertex AI pass-through endpoints - - - - if request specifies a project-id, location -> use credentials corresponding to the project-id, location - - if request does not specify a project-id, location -> use credentials corresponding to the DEFAULT_VERTEXAI_PROJECT, DEFAULT_VERTEXAI_LOCATION - """ - - def __init__(self): - """ - Initialize the VertexPassThroughRouter - Stores the vertex credentials for each deployment key - ``` - { - "project_id-location": VertexPassThroughCredentials, - "adroit-crow-us-central1": VertexPassThroughCredentials, - } - ``` - """ - self.deployment_key_to_vertex_credentials: Dict[ - str, VertexPassThroughCredentials - ] = {} - pass - - def get_vertex_credentials( - self, project_id: Optional[str], location: Optional[str] - ) -> VertexPassThroughCredentials: - """ - Get the vertex credentials for the given project-id, location - """ - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - default_vertex_config, - ) - - deployment_key = self._get_deployment_key( - project_id=project_id, - location=location, - ) - if deployment_key is None: - return default_vertex_config - if deployment_key in self.deployment_key_to_vertex_credentials: - return self.deployment_key_to_vertex_credentials[deployment_key] - else: - return default_vertex_config - - def add_vertex_credentials( - self, - project_id: str, - location: str, - vertex_credentials: VERTEX_CREDENTIALS_TYPES, - ): - """ - Add the vertex credentials for the given project-id, location - """ - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - _set_default_vertex_config, - ) - - deployment_key = self._get_deployment_key( - project_id=project_id, - location=location, - ) - if deployment_key is None: - verbose_proxy_logger.debug( - "No deployment key found for project-id, location" - ) - return - vertex_pass_through_credentials = VertexPassThroughCredentials( - vertex_project=project_id, - vertex_location=location, - vertex_credentials=vertex_credentials, - ) - self.deployment_key_to_vertex_credentials[deployment_key] = ( - vertex_pass_through_credentials - ) - verbose_proxy_logger.debug( - f"self.deployment_key_to_vertex_credentials: {json.dumps(self.deployment_key_to_vertex_credentials, indent=4, default=str)}" - ) - _set_default_vertex_config(vertex_pass_through_credentials) - - def _get_deployment_key( - self, project_id: Optional[str], location: Optional[str] - ) -> Optional[str]: - """ - Get the deployment key for the given project-id, location - """ - if project_id is None or location is None: - return None - return f"{project_id}-{location}" - - @staticmethod - def _get_vertex_project_id_from_url(url: str) -> Optional[str]: - """ - Get the vertex project id from the url - - `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent` - """ - match = re.search(r"/projects/([^/]+)", url) - return match.group(1) if match else None - - @staticmethod - def _get_vertex_location_from_url(url: str) -> Optional[str]: - """ - Get the vertex location from the url - - `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent` - """ - match = re.search(r"/locations/([^/]+)", url) - return match.group(1) if match else None diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 43f37bdbc6..aec2f8fe4a 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -232,6 +232,9 @@ def responses( timeout=timeout or request_timeout, _is_async=_is_async, client=kwargs.get("client"), + fake_stream=responses_api_provider_config.should_fake_stream( + model=model, stream=stream, custom_llm_provider=custom_llm_provider + ), ) return response diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py index c016e71e7e..3039efb9f7 100644 --- a/litellm/responses/streaming_iterator.py +++ b/litellm/responses/streaming_iterator.py @@ -11,6 +11,7 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.types.llms.openai import ( + ResponseCompletedEvent, ResponsesAPIStreamEvents, ResponsesAPIStreamingResponse, ) @@ -207,3 +208,63 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): start_time=self.start_time, end_time=datetime.now(), ) + + +class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): + """ + mock iterator - some models like o1-pro do not support streaming, we need to fake a stream + """ + + def __init__( + self, + response: httpx.Response, + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + logging_obj: LiteLLMLoggingObj, + ): + self.raw_http_response = response + super().__init__( + response=response, + model=model, + responses_api_provider_config=responses_api_provider_config, + logging_obj=logging_obj, + ) + self.is_done = False + + def __aiter__(self): + return self + + async def __anext__(self) -> ResponsesAPIStreamingResponse: + if self.is_done: + raise StopAsyncIteration + self.is_done = True + transformed_response = ( + self.responses_api_provider_config.transform_response_api_response( + model=self.model, + raw_response=self.raw_http_response, + logging_obj=self.logging_obj, + ) + ) + return ResponseCompletedEvent( + type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED, + response=transformed_response, + ) + + def __iter__(self): + return self + + def __next__(self) -> ResponsesAPIStreamingResponse: + if self.is_done: + raise StopIteration + self.is_done = True + transformed_response = ( + self.responses_api_provider_config.transform_response_api_response( + model=self.model, + raw_response=self.raw_http_response, + logging_obj=self.logging_obj, + ) + ) + return ResponseCompletedEvent( + type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED, + response=transformed_response, + ) diff --git a/litellm/router.py b/litellm/router.py index a395c851dd..af7b00e79d 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -4495,11 +4495,11 @@ class Router: Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints """ if deployment.litellm_params.use_in_pass_through is True: - if custom_llm_provider == "vertex_ai": - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - vertex_pass_through_router, - ) + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + passthrough_endpoint_router, + ) + if custom_llm_provider == "vertex_ai": if ( deployment.litellm_params.vertex_project is None or deployment.litellm_params.vertex_location is None @@ -4508,16 +4508,12 @@ class Router: raise ValueError( "vertex_project, vertex_location, and vertex_credentials must be set in litellm_params for pass-through endpoints" ) - vertex_pass_through_router.add_vertex_credentials( + passthrough_endpoint_router.add_vertex_credentials( project_id=deployment.litellm_params.vertex_project, location=deployment.litellm_params.vertex_location, vertex_credentials=deployment.litellm_params.vertex_credentials, ) else: - from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( - passthrough_endpoint_router, - ) - passthrough_endpoint_router.set_pass_through_credentials( custom_llm_provider=custom_llm_provider, api_base=deployment.litellm_params.api_base, diff --git a/litellm/router_utils/handle_error.py b/litellm/router_utils/handle_error.py index e1055a9d0f..132440cbc3 100644 --- a/litellm/router_utils/handle_error.py +++ b/litellm/router_utils/handle_error.py @@ -1,7 +1,9 @@ from typing import TYPE_CHECKING, Any, Optional from litellm._logging import verbose_router_logger -from litellm.router_utils.cooldown_handlers import _async_get_cooldown_deployments +from litellm.router_utils.cooldown_handlers import ( + _async_get_cooldown_deployments_with_debug_info, +) from litellm.types.integrations.slack_alerting import AlertType from litellm.types.router import RouterRateLimitError @@ -75,7 +77,7 @@ async def async_raise_no_deployment_exception( _cooldown_time = litellm_router_instance.cooldown_cache.get_min_cooldown( model_ids=model_ids, parent_otel_span=parent_otel_span ) - _cooldown_list = await _async_get_cooldown_deployments( + _cooldown_list = await _async_get_cooldown_deployments_with_debug_info( litellm_router_instance=litellm_router_instance, parent_otel_span=parent_otel_span, ) diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 4b0be9d5fe..e58f573227 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -382,6 +382,28 @@ class ChatCompletionThinkingBlock(TypedDict, total=False): cache_control: Optional[Union[dict, ChatCompletionCachedContent]] +class ChatCompletionAnnotationURLCitation(TypedDict, total=False): + end_index: int + """The index of the last character of the URL citation in the message.""" + + start_index: int + """The index of the first character of the URL citation in the message.""" + + title: str + """The title of the web resource.""" + + url: str + """The URL of the web resource.""" + + +class ChatCompletionAnnotation(TypedDict, total=False): + type: Literal["url_citation"] + """The type of the URL citation. Always `url_citation`.""" + + url_citation: ChatCompletionAnnotationURLCitation + """A URL citation when using web search.""" + + class OpenAIChatCompletionTextObject(TypedDict): type: Literal["text"] text: str diff --git a/litellm/types/utils.py b/litellm/types/utils.py index a665428561..8821d2c80b 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -7,6 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union from aiohttp import FormData from openai._models import BaseModel as OpenAIObject from openai.types.audio.transcription_create_params import FileTypes # type: ignore +from openai.types.chat.chat_completion import ChatCompletion from openai.types.completion_usage import ( CompletionTokensDetails, CompletionUsage, @@ -27,6 +28,7 @@ from ..litellm_core_utils.core_helpers import map_finish_reason from .guardrails import GuardrailEventHooks from .llms.openai import ( Batch, + ChatCompletionAnnotation, ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, ChatCompletionUsageBlock, @@ -527,6 +529,7 @@ class Message(OpenAIObject): provider_specific_fields: Optional[Dict[str, Any]] = Field( default=None, exclude=True ) + annotations: Optional[List[ChatCompletionAnnotation]] = None def __init__( self, @@ -538,6 +541,7 @@ class Message(OpenAIObject): provider_specific_fields: Optional[Dict[str, Any]] = None, reasoning_content: Optional[str] = None, thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None, + annotations: Optional[List[ChatCompletionAnnotation]] = None, **params, ): init_values: Dict[str, Any] = { @@ -566,6 +570,9 @@ class Message(OpenAIObject): if thinking_blocks is not None: init_values["thinking_blocks"] = thinking_blocks + if annotations is not None: + init_values["annotations"] = annotations + if reasoning_content is not None: init_values["reasoning_content"] = reasoning_content @@ -623,6 +630,7 @@ class Delta(OpenAIObject): audio: Optional[ChatCompletionAudioResponse] = None, reasoning_content: Optional[str] = None, thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None, + annotations: Optional[List[ChatCompletionAnnotation]] = None, **params, ): super(Delta, self).__init__(**params) @@ -633,6 +641,7 @@ class Delta(OpenAIObject): self.function_call: Optional[Union[FunctionCall, Any]] = None self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None self.audio: Optional[ChatCompletionAudioResponse] = None + self.annotations: Optional[List[ChatCompletionAnnotation]] = None if reasoning_content is not None: self.reasoning_content = reasoning_content @@ -646,6 +655,12 @@ class Delta(OpenAIObject): # ensure default response matches OpenAI spec del self.thinking_blocks + # Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec) + if annotations is not None: + self.annotations = annotations + else: + del self.annotations + if function_call is not None and isinstance(function_call, dict): self.function_call = FunctionCall(**function_call) else: diff --git a/litellm/utils.py b/litellm/utils.py index 677cfe7684..03e69acf4e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1975,6 +1975,60 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) -> ) +def supports_web_search(model: str, custom_llm_provider: Optional[str]) -> bool: + """ + Check if the given model supports web search and return a boolean value. + + Parameters: + model (str): The model name to be checked. + custom_llm_provider (str): The provider to be checked. + + Returns: + bool: True if the model supports web search, False otherwise. + + Raises: + Exception: If the given model is not found in model_prices_and_context_window.json. + """ + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_web_search", + ) + + +def supports_native_streaming(model: str, custom_llm_provider: Optional[str]) -> bool: + """ + Check if the given model supports native streaming and return a boolean value. + + Parameters: + model (str): The model name to be checked. + custom_llm_provider (str): The provider to be checked. + + Returns: + bool: True if the model supports native streaming, False otherwise. + + Raises: + Exception: If the given model is not found in model_prices_and_context_window.json. + """ + try: + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, custom_llm_provider=custom_llm_provider + ) + + model_info = _get_model_info_helper( + model=model, custom_llm_provider=custom_llm_provider + ) + supports_native_streaming = model_info.get("supports_native_streaming", True) + if supports_native_streaming is None: + supports_native_streaming = True + return supports_native_streaming + except Exception as e: + verbose_logger.debug( + f"Model not found or error in checking supports_native_streaming support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" + ) + return False + + def supports_response_schema( model: str, custom_llm_provider: Optional[str] = None ) -> bool: diff --git a/mcp_servers.json b/mcp_servers.json new file mode 100644 index 0000000000..c196815747 --- /dev/null +++ b/mcp_servers.json @@ -0,0 +1,16 @@ +{ + "brave-search": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "BRAVE_API_KEY", + "mcp/brave-search" + ], + "env": { + "BRAVE_API_KEY": "YOUR_API_KEY_HERE" + } + } +} \ No newline at end of file diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f2ca9156ad..1d4353e3ed 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -15,6 +15,12 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.0000, + "search_context_size_medium": 0.0000, + "search_context_size_high": 0.0000 + }, "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD" }, "omni-moderation-latest": { @@ -74,7 +80,63 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } + }, + "gpt-4o-search-preview-2025-03-11": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } + }, + "gpt-4o-search-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } }, "gpt-4.5-preview": { "max_tokens": 16384, @@ -199,7 +261,63 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } + }, + "gpt-4o-mini-search-preview-2025-03-11":{ + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } + }, + "gpt-4o-mini-search-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275, + "search_context_size_high": 0.030 + } }, "gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -218,7 +336,12 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "search_context_cost_per_query": { + "search_context_size_low": 30.00, + "search_context_size_medium": 35.00, + "search_context_size_high": 50.00 + } }, "o1-pro": { "max_tokens": 100000, @@ -425,7 +548,13 @@ "supports_vision": true, "supports_prompt_caching": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.030, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.050 + } }, "gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -1426,6 +1555,25 @@ "supports_vision": false, "supports_prompt_caching": true }, + "azure/gpt-4.5-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000075, + "output_cost_per_token": 0.00015, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-4o": { "max_tokens": 16384, "max_input_tokens": 128000, @@ -2091,6 +2239,18 @@ "mode": "chat", "supports_tool_choice": true }, + "azure_ai/mistral-small-2503": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true + }, "azure_ai/mistral-large-2407": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/poetry.lock b/poetry.lock index d270aa2d79..5834dd2e70 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -6,6 +6,7 @@ version = "2.4.4" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"}, {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"}, @@ -17,6 +18,7 @@ version = "3.10.11" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, @@ -129,6 +131,7 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -143,6 +146,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -157,6 +161,7 @@ version = "4.5.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"}, {file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"}, @@ -179,6 +184,8 @@ version = "3.11.0" description = "In-process task scheduler with Cron-like capabilities" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da"}, {file = "apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133"}, @@ -207,6 +214,8 @@ version = "5.0.1" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "python_full_version < \"3.11.3\" and extra == \"proxy\" or python_version < \"3.11\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -218,6 +227,7 @@ version = "25.3.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, @@ -237,6 +247,8 @@ version = "1.32.0" description = "Microsoft Azure Core Library for Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "azure_core-1.32.0-py3-none-any.whl", hash = "sha256:eac191a0efb23bfa83fddf321b27b122b4ec847befa3091fa736a5c32c50d7b4"}, {file = "azure_core-1.32.0.tar.gz", hash = "sha256:22b3c35d6b2dae14990f6c1be2912bf23ffe50b220e708a28ab1bb92b1c730e5"}, @@ -256,6 +268,8 @@ version = "1.21.0" description = "Microsoft Azure Identity Library for Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9"}, {file = "azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6"}, @@ -274,6 +288,8 @@ version = "4.9.0" description = "Microsoft Azure Key Vault Secrets Client Library for Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "azure_keyvault_secrets-4.9.0-py3-none-any.whl", hash = "sha256:33c7e2aca2cc2092cebc8c6e96eca36a5cc30c767e16ea429c5fa21270e9fba6"}, {file = "azure_keyvault_secrets-4.9.0.tar.gz", hash = "sha256:2a03bb2ffd9a0d6c8ad1c330d9d0310113985a9de06607ece378fd72a5889fe1"}, @@ -290,6 +306,8 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = true python-versions = ">=3.7,<4.0" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -301,6 +319,8 @@ version = "0.2.1" description = "Backport of the standard library zoneinfo module" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"proxy\" and python_version < \"3.9\"" files = [ {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, @@ -329,6 +349,7 @@ version = "23.12.1" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, @@ -375,6 +396,8 @@ version = "1.34.34" description = "The AWS SDK for Python" optional = true python-versions = ">= 3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "boto3-1.34.34-py3-none-any.whl", hash = "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa"}, {file = "boto3-1.34.34.tar.gz", hash = "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1"}, @@ -394,6 +417,8 @@ version = "1.34.162" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "botocore-1.34.162-py3-none-any.whl", hash = "sha256:2d918b02db88d27a75b48275e6fb2506e9adaaddbec1ffa6a8a0898b34e769be"}, {file = "botocore-1.34.162.tar.gz", hash = "sha256:adc23be4fb99ad31961236342b7cbf3c0bfc62532cd02852196032e8c0d682f3"}, @@ -416,6 +441,8 @@ version = "5.5.2" description = "Extensible memoizing collections and decorators" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -427,6 +454,7 @@ version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, @@ -438,6 +466,8 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -517,6 +547,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -618,6 +649,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -632,10 +664,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} [[package]] name = "cryptography" @@ -643,6 +677,8 @@ version = "43.0.3" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"proxy\" or extra == \"extra-proxy\"" files = [ {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, @@ -692,6 +728,7 @@ version = "1.9.0" description = "Distro - an OS platform information API" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, @@ -703,6 +740,8 @@ version = "2.6.1" description = "DNS toolkit" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"}, {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"}, @@ -723,6 +762,8 @@ version = "2.2.0" description = "A robust email address syntax and deliverability validation library." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"}, {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"}, @@ -738,6 +779,8 @@ version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -752,6 +795,8 @@ version = "0.115.11" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "fastapi-0.115.11-py3-none-any.whl", hash = "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64"}, {file = "fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"}, @@ -772,6 +817,8 @@ version = "0.16.0" description = "FastAPI plugin to enable SSO to most common providers (such as Facebook login, Google login and login via Microsoft Office 365 Account)" optional = true python-versions = "<4.0,>=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "fastapi_sso-0.16.0-py3-none-any.whl", hash = "sha256:3a66a942474ef9756d3a9d8b945d55bd9faf99781facdb9b87a40b73d6d6b0c3"}, {file = "fastapi_sso-0.16.0.tar.gz", hash = "sha256:f3941f986347566b7d3747c710cf474a907f581bfb6697ff3bb3e44eb76b438c"}, @@ -790,6 +837,7 @@ version = "3.16.1" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, @@ -806,6 +854,7 @@ version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" +groups = ["dev"] files = [ {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, @@ -822,6 +871,7 @@ version = "1.5.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, @@ -923,6 +973,7 @@ version = "2025.3.0" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"}, {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"}, @@ -962,6 +1013,8 @@ version = "2.24.2" description = "Google API client core library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "google_api_core-2.24.2-py3-none-any.whl", hash = "sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9"}, {file = "google_api_core-2.24.2.tar.gz", hash = "sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696"}, @@ -997,6 +1050,8 @@ version = "2.38.0" description = "Google Authentication Library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, @@ -1021,6 +1076,8 @@ version = "2.24.2" description = "Google Cloud Kms API client library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "google_cloud_kms-2.24.2-py2.py3-none-any.whl", hash = "sha256:368209b035dfac691a467c1cf50986d8b1b26cac1166bdfbaa25d738df91ff7b"}, {file = "google_cloud_kms-2.24.2.tar.gz", hash = "sha256:e9e18bbfafd1a4035c76c03fb5ff03f4f57f596d08e1a9ede7e69ec0151b27a1"}, @@ -1039,6 +1096,8 @@ version = "1.69.2" description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "googleapis_common_protos-1.69.2-py3-none-any.whl", hash = "sha256:0b30452ff9c7a27d80bfc5718954063e8ab53dd3697093d3bc99581f5fd24212"}, {file = "googleapis_common_protos-1.69.2.tar.gz", hash = "sha256:3e1b904a27a33c821b4b749fd31d334c0c9c30e6113023d495e48979a3dc9c5f"}, @@ -1057,6 +1116,8 @@ version = "0.14.2" description = "IAM API client library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "grpc_google_iam_v1-0.14.2-py3-none-any.whl", hash = "sha256:a3171468459770907926d56a440b2bb643eec1d7ba215f48f3ecece42b4d8351"}, {file = "grpc_google_iam_v1-0.14.2.tar.gz", hash = "sha256:b3e1fc387a1a329e41672197d0ace9de22c78dd7d215048c4c78712073f7bd20"}, @@ -1073,6 +1134,8 @@ version = "1.70.0" description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"}, {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"}, @@ -1134,75 +1197,14 @@ files = [ [package.extras] protobuf = ["grpcio-tools (>=1.70.0)"] -[[package]] -name = "grpcio" -version = "1.71.0" -description = "HTTP/2-based RPC framework" -optional = true -python-versions = ">=3.9" -files = [ - {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"}, - {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"}, - {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"}, - {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"}, - {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"}, - {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"}, - {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"}, - {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"}, - {file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"}, - {file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"}, - {file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"}, - {file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"}, - {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"}, - {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"}, - {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"}, - {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"}, - {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"}, - {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"}, - {file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"}, - {file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"}, - {file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"}, - {file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"}, - {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"}, - {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"}, - {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"}, - {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"}, - {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"}, - {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"}, - {file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"}, - {file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"}, - {file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"}, - {file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"}, - {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"}, - {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"}, - {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"}, - {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"}, - {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"}, - {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"}, - {file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"}, - {file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"}, - {file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"}, - {file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"}, - {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"}, - {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"}, - {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"}, - {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"}, - {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"}, - {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"}, - {file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"}, - {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"}, - {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.71.0)"] - [[package]] name = "grpcio-status" version = "1.70.0" description = "Status proto mapping for gRPC" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"}, {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"}, @@ -1213,31 +1215,17 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.70.0" protobuf = ">=5.26.1,<6.0dev" -[[package]] -name = "grpcio-status" -version = "1.71.0" -description = "Status proto mapping for gRPC" -optional = true -python-versions = ">=3.9" -files = [ - {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"}, - {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.5.5" -grpcio = ">=1.71.0" -protobuf = ">=5.26.1,<6.0dev" - [[package]] name = "gunicorn" -version = "22.0.0" +version = "23.0.0" description = "WSGI HTTP Server for UNIX" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ - {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"}, - {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"}, + {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"}, + {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"}, ] [package.dependencies] @@ -1256,6 +1244,7 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -1267,6 +1256,7 @@ version = "1.0.7" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, @@ -1288,6 +1278,7 @@ version = "0.28.1" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -1312,6 +1303,7 @@ version = "0.29.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["main"] files = [ {file = "huggingface_hub-0.29.3-py3-none-any.whl", hash = "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa"}, {file = "huggingface_hub-0.29.3.tar.gz", hash = "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5"}, @@ -1346,6 +1338,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -1360,6 +1353,7 @@ version = "8.5.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"}, @@ -1383,6 +1377,8 @@ version = "6.4.5" description = "Read resources from Python packages" optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "python_version < \"3.9\"" files = [ {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"}, {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"}, @@ -1405,6 +1401,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -1416,6 +1413,8 @@ version = "0.7.2" description = "An ISO 8601 date/time/duration parser and formatter" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, @@ -1427,6 +1426,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -1444,6 +1444,7 @@ version = "0.9.0" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"}, {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"}, @@ -1529,6 +1530,8 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -1540,6 +1543,7 @@ version = "4.23.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, @@ -1563,6 +1567,7 @@ version = "2023.12.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, @@ -1578,6 +1583,7 @@ version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, @@ -1647,6 +1653,7 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -1658,6 +1665,8 @@ version = "1.32.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "msal-1.32.0-py3-none-any.whl", hash = "sha256:9dbac5384a10bbbf4dae5c7ea0d707d14e087b92c5aa4954b3feaa2d1aa0bcb7"}, {file = "msal-1.32.0.tar.gz", hash = "sha256:5445fe3af1da6be484991a7ab32eaa82461dc2347de105b76af92c610c3335c2"}, @@ -1677,6 +1686,8 @@ version = "1.3.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "msal_extensions-1.3.0-py3-none-any.whl", hash = "sha256:105328ddcbdd342016c9949d8f89e3917554740c8ab26669c0fa0e069e730a0e"}, {file = "msal_extensions-1.3.0.tar.gz", hash = "sha256:96918996642b38c78cd59b55efa0f06fd1373c90e0949be8615697c048fba62c"}, @@ -1694,6 +1705,7 @@ version = "6.1.0" description = "multidict implementation" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"}, {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"}, @@ -1798,6 +1810,7 @@ version = "1.14.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb"}, {file = "mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0"}, @@ -1857,6 +1870,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -1868,6 +1882,8 @@ version = "1.9.1" description = "Node.js virtual environment builder" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, @@ -1879,6 +1895,8 @@ version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, @@ -1895,6 +1913,7 @@ version = "1.66.3" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9"}, {file = "openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9"}, @@ -1920,6 +1939,8 @@ version = "3.10.15" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"}, {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"}, @@ -2008,6 +2029,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -2019,6 +2041,7 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -2030,6 +2053,8 @@ version = "1.3.10" description = "Resolve a name to an object." optional = false python-versions = ">=3.6" +groups = ["main"] +markers = "python_version < \"3.9\"" files = [ {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, @@ -2041,6 +2066,7 @@ version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, @@ -2057,6 +2083,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -2072,6 +2099,8 @@ version = "0.11.0" description = "Prisma Client Python is an auto-generated and fully type-safe database client" optional = true python-versions = ">=3.7.0" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "prisma-0.11.0-py3-none-any.whl", hash = "sha256:22bb869e59a2968b99f3483bb417717273ffbc569fd1e9ceed95e5614cbaf53a"}, {file = "prisma-0.11.0.tar.gz", hash = "sha256:3f2f2fd2361e1ec5ff655f2a04c7860c2f2a5bc4c91f78ca9c5c6349735bf693"}, @@ -2097,6 +2126,7 @@ version = "0.2.0" description = "Accelerated property cache" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, @@ -2204,6 +2234,8 @@ version = "1.26.1" description = "Beautiful, Pythonic protocol buffers" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"}, {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"}, @@ -2221,6 +2253,8 @@ version = "5.29.3" description = "" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"}, {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"}, @@ -2241,6 +2275,8 @@ version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -2252,6 +2288,8 @@ version = "0.4.1" description = "A collection of ASN.1-based protocols modules" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, @@ -2266,6 +2304,7 @@ version = "2.11.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, @@ -2277,6 +2316,8 @@ version = "2.22" description = "C parser in Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -2288,6 +2329,7 @@ version = "2.10.6" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"}, {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"}, @@ -2309,6 +2351,7 @@ version = "2.27.2" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"}, {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"}, @@ -2421,6 +2464,7 @@ version = "3.1.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, @@ -2432,6 +2476,8 @@ version = "2.9.0" description = "JSON Web Token implementation in Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\" or extra == \"extra-proxy\"" files = [ {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"}, {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"}, @@ -2452,6 +2498,8 @@ version = "1.5.0" description = "Python binding to the Networking and Cryptography (NaCl) library" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, @@ -2478,6 +2526,7 @@ version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, @@ -2500,6 +2549,7 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2517,6 +2567,8 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2531,6 +2583,7 @@ version = "1.0.1" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, @@ -2545,6 +2598,8 @@ version = "0.0.18" description = "A streaming multipart parser for Python" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "python_multipart-0.0.18-py3-none-any.whl", hash = "sha256:efe91480f485f6a361427a541db4796f9e1591afc0fb8e7a4ba06bfbc6708996"}, {file = "python_multipart-0.0.18.tar.gz", hash = "sha256:7a68db60c8bfb82e460637fa4750727b45af1d5e2ed215593f917f64694d34fe"}, @@ -2556,6 +2611,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -2618,6 +2674,8 @@ version = "5.2.1" description = "Python client for Redis database and key-value store" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"}, {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"}, @@ -2636,6 +2694,7 @@ version = "0.35.1" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, @@ -2651,6 +2710,7 @@ version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, @@ -2754,6 +2814,7 @@ version = "2.31.0" description = "Python HTTP for Humans." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, @@ -2775,6 +2836,8 @@ version = "0.8.0" description = "Resend Python SDK" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "resend-0.8.0-py2.py3-none-any.whl", hash = "sha256:adc1515dadf4f4fc6b90db55a237f0f37fc56fd74287a986519a8a187fdb661d"}, {file = "resend-0.8.0.tar.gz", hash = "sha256:94142394701724dbcfcd8f760f675c662a1025013e741dd7cc773ca885526257"}, @@ -2789,6 +2852,7 @@ version = "0.20.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"}, {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"}, @@ -2901,6 +2965,8 @@ version = "2.1.0" description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "rq-2.1.0-py3-none-any.whl", hash = "sha256:3c6892c6ca848e5fb47c1875399a66f13656bf0e123bf725d9aa9a12718e2fdf"}, {file = "rq-2.1.0.tar.gz", hash = "sha256:764585b6cab69ef1412f4aee523347e5aa7ece3ca175c118b1d92223dd8c2826"}, @@ -2916,6 +2982,8 @@ version = "4.9" description = "Pure-Python RSA implementation" optional = true python-versions = ">=3.6,<4" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, @@ -2930,6 +2998,8 @@ version = "0.10.4" description = "An Amazon S3 Transfer Manager" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"}, {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"}, @@ -2947,6 +3017,8 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +markers = "extra == \"extra-proxy\" or extra == \"proxy\"" files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -2958,6 +3030,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -2969,6 +3042,8 @@ version = "0.44.0" description = "The little ASGI library that shines." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "starlette-0.44.0-py3-none-any.whl", hash = "sha256:19edeb75844c16dcd4f9dd72f22f9108c1539f3fc9c4c88885654fef64f85aea"}, {file = "starlette-0.44.0.tar.gz", hash = "sha256:e35166950a3ccccc701962fe0711db0bc14f2ecd37c6f9fe5e3eae0cbaea8715"}, @@ -2987,6 +3062,7 @@ version = "0.7.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"}, {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"}, @@ -3039,6 +3115,7 @@ version = "0.21.0" description = "" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"}, {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"}, @@ -3071,6 +3148,8 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -3112,6 +3191,8 @@ version = "0.13.2" description = "Style preserving TOML library" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"extra-proxy\"" files = [ {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, @@ -3123,6 +3204,7 @@ version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -3144,6 +3226,7 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, @@ -3155,6 +3238,8 @@ version = "2025.1" description = "Provider of IANA time zone data" optional = true python-versions = ">=2" +groups = ["main"] +markers = "extra == \"proxy\" and platform_system == \"Windows\"" files = [ {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, @@ -3166,6 +3251,8 @@ version = "5.2" description = "tzinfo object for the local timezone" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, @@ -3184,6 +3271,8 @@ version = "1.26.20" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main"] +markers = "python_version < \"3.10\"" files = [ {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, @@ -3200,6 +3289,8 @@ version = "2.2.3" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "python_version >= \"3.10\"" files = [ {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, @@ -3217,6 +3308,8 @@ version = "0.29.0" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "uvicorn-0.29.0-py3-none-any.whl", hash = "sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de"}, {file = "uvicorn-0.29.0.tar.gz", hash = "sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0"}, @@ -3236,6 +3329,8 @@ version = "0.21.0" description = "Fast implementation of asyncio event loop on top of libuv" optional = true python-versions = ">=3.8.0" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, @@ -3287,6 +3382,8 @@ version = "13.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"proxy\"" files = [ {file = "websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee"}, {file = "websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7"}, @@ -3382,6 +3479,7 @@ version = "1.15.2" description = "Yet another URL library" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e4ee8b8639070ff246ad3649294336b06db37a94bdea0d09ea491603e0be73b8"}, {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7cf963a357c5f00cb55b1955df8bbe68d2f2f65de065160a1c26b85a1e44172"}, @@ -3494,6 +3592,7 @@ version = "3.20.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"}, {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"}, @@ -3512,6 +3611,6 @@ extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", " proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "pynacl", "python-multipart", "pyyaml", "rq", "uvicorn", "uvloop", "websockets"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "b9daad0a009079f7bf9c520525e2f9d0ea7ade51a1c598b88e23d6d590ef44be" +content-hash = "55078af47c1af79bd3ebadacb7ba92844d550a577bb0c49f5096693701ea4322" diff --git a/pyproject.toml b/pyproject.toml index 38d5687800..208804c562 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.63.12" +version = "1.63.14" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -34,7 +34,7 @@ jsonschema = "^4.22.0" uvicorn = {version = "^0.29.0", optional = true} uvloop = {version = "^0.21.0", optional = true} -gunicorn = {version = "^22.0.0", optional = true} +gunicorn = {version = "^23.0.0", optional = true} fastapi = {version = "^0.115.5", optional = true} backoff = {version = "*", optional = true} pyyaml = {version = "^6.0.1", optional = true} @@ -100,7 +100,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.63.12" +version = "1.63.14" version_files = [ "pyproject.toml:^version" ] diff --git a/requirements.txt b/requirements.txt index abe021c709..2e3715e55e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ # LITELLM PROXY DEPENDENCIES # anyio==4.5.0 # openai + http req. httpx==0.27.0 # Pin Httpx dependency -openai==1.66.1 # openai req. +openai==1.68.2 # openai req. fastapi==0.115.5 # server dep backoff==2.2.1 # server dep pyyaml==6.0.2 # server dep uvicorn==0.29.0 # server dep -gunicorn==22.0.0 # server dep +gunicorn==23.0.0 # server dep uvloop==0.21.0 # uvicorn dep, gives us much better performance under load boto3==1.34.34 # aws bedrock/sagemaker calls redis==5.0.0 # caching diff --git a/tests/litellm/caching/test_in_memory_cache.py b/tests/litellm/caching/test_in_memory_cache.py new file mode 100644 index 0000000000..d69899fec1 --- /dev/null +++ b/tests/litellm/caching/test_in_memory_cache.py @@ -0,0 +1,45 @@ +import asyncio +import json +import os +import sys +import time +from unittest.mock import MagicMock, patch + +import httpx +import pytest +import respx +from fastapi.testclient import TestClient + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path +from unittest.mock import AsyncMock + +from litellm.caching.in_memory_cache import InMemoryCache + + +def test_in_memory_openai_obj_cache(): + from openai import OpenAI + + openai_obj = OpenAI(api_key="my-fake-key") + + in_memory_cache = InMemoryCache() + + in_memory_cache.set_cache(key="my-fake-key", value=openai_obj) + + cached_obj = in_memory_cache.get_cache(key="my-fake-key") + + assert cached_obj is not None + + assert cached_obj == openai_obj + + +def test_in_memory_cache_max_size_per_item(): + """ + Test that the cache will not store items larger than the max size per item + """ + in_memory_cache = InMemoryCache(max_size_per_item=100) + + result = in_memory_cache.check_value_size("a" * 100000000) + + assert result is False diff --git a/tests/litellm/experimental_mcp_client/test_tools.py b/tests/litellm/experimental_mcp_client/test_tools.py new file mode 100644 index 0000000000..7089d83217 --- /dev/null +++ b/tests/litellm/experimental_mcp_client/test_tools.py @@ -0,0 +1,157 @@ +import json +import os +import sys +from unittest.mock import AsyncMock, MagicMock + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from mcp.types import ( + CallToolRequestParams, + CallToolResult, + ListToolsResult, + TextContent, +) +from mcp.types import Tool as MCPTool + +from litellm.experimental_mcp_client.tools import ( + _get_function_arguments, + _transform_openai_tool_call_to_mcp_tool_call_request, + call_mcp_tool, + call_openai_tool, + load_mcp_tools, + transform_mcp_tool_to_openai_tool, +) + + +@pytest.fixture +def mock_mcp_tool(): + return MCPTool( + name="test_tool", + description="A test tool", + inputSchema={"type": "object", "properties": {"test": {"type": "string"}}}, + ) + + +@pytest.fixture +def mock_session(): + session = MagicMock() + session.list_tools = AsyncMock() + session.call_tool = AsyncMock() + return session + + +@pytest.fixture +def mock_list_tools_result(): + return ListToolsResult( + tools=[ + MCPTool( + name="test_tool", + description="A test tool", + inputSchema={ + "type": "object", + "properties": {"test": {"type": "string"}}, + }, + ) + ] + ) + + +@pytest.fixture +def mock_mcp_tool_call_result(): + return CallToolResult(content=[TextContent(type="text", text="test_output")]) + + +def test_transform_mcp_tool_to_openai_tool(mock_mcp_tool): + openai_tool = transform_mcp_tool_to_openai_tool(mock_mcp_tool) + assert openai_tool["type"] == "function" + assert openai_tool["function"]["name"] == "test_tool" + assert openai_tool["function"]["description"] == "A test tool" + assert openai_tool["function"]["parameters"] == { + "type": "object", + "properties": {"test": {"type": "string"}}, + } + + +def test_transform_openai_tool_call_to_mcp_tool_call_request(mock_mcp_tool): + openai_tool = { + "function": {"name": "test_tool", "arguments": json.dumps({"test": "value"})} + } + mcp_tool_call_request = _transform_openai_tool_call_to_mcp_tool_call_request( + openai_tool + ) + assert mcp_tool_call_request.name == "test_tool" + assert mcp_tool_call_request.arguments == {"test": "value"} + + +@pytest.mark.asyncio() +async def test_load_mcp_tools_mcp_format(mock_session, mock_list_tools_result): + mock_session.list_tools.return_value = mock_list_tools_result + result = await load_mcp_tools(mock_session, format="mcp") + assert len(result) == 1 + assert isinstance(result[0], MCPTool) + assert result[0].name == "test_tool" + mock_session.list_tools.assert_called_once() + + +@pytest.mark.asyncio() +async def test_load_mcp_tools_openai_format(mock_session, mock_list_tools_result): + mock_session.list_tools.return_value = mock_list_tools_result + result = await load_mcp_tools(mock_session, format="openai") + assert len(result) == 1 + assert result[0]["type"] == "function" + assert result[0]["function"]["name"] == "test_tool" + mock_session.list_tools.assert_called_once() + + +def test_get_function_arguments(): + # Test with string arguments + function = {"arguments": '{"test": "value"}'} + result = _get_function_arguments(function) + assert result == {"test": "value"} + + # Test with dict arguments + function = {"arguments": {"test": "value"}} + result = _get_function_arguments(function) + assert result == {"test": "value"} + + # Test with invalid JSON string + function = {"arguments": "invalid json"} + result = _get_function_arguments(function) + assert result == {} + + # Test with no arguments + function = {} + result = _get_function_arguments(function) + assert result == {} + + +@pytest.mark.asyncio() +async def test_call_openai_tool(mock_session, mock_mcp_tool_call_result): + mock_session.call_tool.return_value = mock_mcp_tool_call_result + openai_tool = { + "function": {"name": "test_tool", "arguments": json.dumps({"test": "value"})} + } + result = await call_openai_tool(mock_session, openai_tool) + print("result of call_openai_tool", result) + assert result.content[0].text == "test_output" + mock_session.call_tool.assert_called_once_with( + name="test_tool", arguments={"test": "value"} + ) + + +@pytest.mark.asyncio() +async def test_call_mcp_tool(mock_session, mock_mcp_tool_call_result): + mock_session.call_tool.return_value = mock_mcp_tool_call_result + request_params = CallToolRequestParams( + name="test_tool", arguments={"test": "value"} + ) + result = await call_mcp_tool(mock_session, request_params) + print("call_mcp_tool result", result) + assert result.content[0].text == "test_output" + mock_session.call_tool.assert_called_once_with( + name="test_tool", arguments={"test": "value"} + ) diff --git a/tests/litellm/litellm_core_utils/test_streaming_handler.py b/tests/litellm/litellm_core_utils/test_streaming_handler.py index 75c4fc1035..988d533670 100644 --- a/tests/litellm/litellm_core_utils/test_streaming_handler.py +++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py @@ -136,6 +136,40 @@ def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapp ) +def test_is_chunk_non_empty_with_annotations( + initialized_custom_stream_wrapper: CustomStreamWrapper, +): + """Unit test if non-empty when annotations are present""" + chunk = { + "id": "e89b6501-8ac2-464c-9550-7cd3daf94350", + "object": "chat.completion.chunk", + "created": 1741037890, + "model": "deepseek-reasoner", + "system_fingerprint": "fp_5417b77867_prod0225", + "choices": [ + { + "index": 0, + "delta": { + "content": None, + "annotations": [ + {"type": "url_citation", "url": "https://www.google.com"} + ], + }, + "logprobs": None, + "finish_reason": None, + } + ], + } + assert ( + initialized_custom_stream_wrapper.is_chunk_non_empty( + completion_obj=MagicMock(), + model_response=ModelResponseStream(**chunk), + response_obj=MagicMock(), + ) + is True + ) + + def test_optional_combine_thinking_block_in_choices( initialized_custom_stream_wrapper: CustomStreamWrapper, ): diff --git a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py new file mode 100644 index 0000000000..04f2728284 --- /dev/null +++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py @@ -0,0 +1,35 @@ +import json +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path +from unittest.mock import MagicMock, patch + +from litellm.llms.anthropic.chat.transformation import AnthropicConfig + + +def test_response_format_transformation_unit_test(): + config = AnthropicConfig() + + response_format_json_schema = { + "description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n agent_doing: Brief description of what the agent is currently doing.\n Should be kept under 10 words. Example: "Learning about home automation"', + "properties": {"agent_doing": {"title": "Agent Doing", "type": "string"}}, + "required": ["agent_doing"], + "title": "ThinkingStep", + "type": "object", + "additionalProperties": False, + } + + result = config._create_json_tool_call_for_response_format( + json_schema=response_format_json_schema + ) + + assert result["input_schema"]["properties"] == { + "agent_doing": {"title": "Agent Doing", "type": "string"} + } + print(result) diff --git a/tests/litellm/llms/custom_httpx/test_llm_http_handler.py b/tests/litellm/llms/custom_httpx/test_llm_http_handler.py new file mode 100644 index 0000000000..26fc18de16 --- /dev/null +++ b/tests/litellm/llms/custom_httpx/test_llm_http_handler.py @@ -0,0 +1,77 @@ +import io +import os +import pathlib +import ssl +import sys +from unittest.mock import MagicMock + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../..") +) # Adds the parent directory to the system path +import litellm +from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler + + +def test_prepare_fake_stream_request(): + # Initialize the BaseLLMHTTPHandler + handler = BaseLLMHTTPHandler() + + # Test case 1: fake_stream is True + stream = True + data = { + "stream": True, + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + } + fake_stream = True + + result_stream, result_data = handler._prepare_fake_stream_request( + stream=stream, data=data, fake_stream=fake_stream + ) + + # Verify that stream is set to False + assert result_stream is False + # Verify that "stream" key is removed from data + assert "stream" not in result_data + # Verify other data remains unchanged + assert result_data["model"] == "gpt-4" + assert result_data["messages"] == [{"role": "user", "content": "Hello"}] + + # Test case 2: fake_stream is False + stream = True + data = { + "stream": True, + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + } + fake_stream = False + + result_stream, result_data = handler._prepare_fake_stream_request( + stream=stream, data=data, fake_stream=fake_stream + ) + + # Verify that stream remains True + assert result_stream is True + # Verify that data remains unchanged + assert "stream" in result_data + assert result_data["stream"] is True + assert result_data["model"] == "gpt-4" + assert result_data["messages"] == [{"role": "user", "content": "Hello"}] + + # Test case 3: data doesn't have stream key but fake_stream is True + stream = True + data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + fake_stream = True + + result_stream, result_data = handler._prepare_fake_stream_request( + stream=stream, data=data, fake_stream=fake_stream + ) + + # Verify that stream is set to False + assert result_stream is False + # Verify that data remains unchanged (since there was no stream key to remove) + assert "stream" not in result_data + assert result_data["model"] == "gpt-4" + assert result_data["messages"] == [{"role": "user", "content": "Hello"}] diff --git a/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py b/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py new file mode 100644 index 0000000000..e89355443f --- /dev/null +++ b/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py @@ -0,0 +1,43 @@ +import os +import sys +from unittest.mock import MagicMock, call, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +import litellm +from litellm.llms.vertex_ai.common_utils import ( + get_vertex_location_from_url, + get_vertex_project_id_from_url, +) + + +@pytest.mark.asyncio +async def test_get_vertex_project_id_from_url(): + """Test _get_vertex_project_id_from_url with various URLs""" + # Test with valid URL + url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent" + project_id = get_vertex_project_id_from_url(url) + assert project_id == "test-project" + + # Test with invalid URL + url = "https://invalid-url.com" + project_id = get_vertex_project_id_from_url(url) + assert project_id is None + + +@pytest.mark.asyncio +async def test_get_vertex_location_from_url(): + """Test _get_vertex_location_from_url with various URLs""" + # Test with valid URL + url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent" + location = get_vertex_location_from_url(url) + assert location == "us-central1" + + # Test with invalid URL + url = "https://invalid-url.com" + location = get_vertex_location_from_url(url) + assert location is None diff --git a/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py new file mode 100644 index 0000000000..697be8b3c9 --- /dev/null +++ b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py @@ -0,0 +1,57 @@ +import json +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +sys.path.insert( + 0, os.path.abspath("../../../..") +) # Adds the parent directory to the system path + +from litellm.proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth +from litellm.proxy.management_endpoints.internal_user_endpoints import ui_view_users +from litellm.proxy.proxy_server import app + +client = TestClient(app) + + +@pytest.mark.asyncio +async def test_ui_view_users_with_null_email(mocker, caplog): + """ + Test that /user/filter/ui endpoint returns users even when they have null email fields + """ + # Mock the prisma client + mock_prisma_client = mocker.MagicMock() + + # Create mock user data with null email + mock_user = mocker.MagicMock() + mock_user.model_dump.return_value = { + "user_id": "test-user-null-email", + "user_email": None, + "user_role": "proxy_admin", + "created_at": "2024-01-01T00:00:00Z", + } + + # Setup the mock find_many response + # Setup the mock find_many response as an async function + async def mock_find_many(*args, **kwargs): + return [mock_user] + + mock_prisma_client.db.litellm_usertable.find_many = mock_find_many + + # Patch the prisma client import in the endpoint + mocker.patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client) + + # Call ui_view_users function directly + response = await ui_view_users( + user_api_key_dict=UserAPIKeyAuth(user_id="test_user"), + user_id="test_user", + user_email=None, + page=1, + page_size=50, + ) + + assert response == [ + LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None) + ] diff --git a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py index 2f5ce85de7..da08dea605 100644 --- a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py +++ b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py @@ -1,7 +1,9 @@ import json import os import sys -from unittest.mock import MagicMock, patch +import traceback +from unittest import mock +from unittest.mock import AsyncMock, MagicMock, Mock, patch import httpx import pytest @@ -17,7 +19,9 @@ from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( BaseOpenAIPassThroughHandler, RouteChecks, create_pass_through_route, + vertex_proxy_route, ) +from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials class TestBaseOpenAIPassThroughHandler: @@ -176,3 +180,279 @@ class TestBaseOpenAIPassThroughHandler: print(f"query_params: {call_kwargs['query_params']}") assert call_kwargs["stream"] is False assert call_kwargs["query_params"] == {"model": "gpt-4"} + + +class TestVertexAIPassThroughHandler: + """ + Case 1: User set passthrough credentials - confirm credentials used. + + Case 2: User set default credentials, no exact passthrough credentials - confirm default credentials used. + + Case 3: No default credentials, no mapped credentials - request passed through directly. + """ + + @pytest.mark.asyncio + async def test_vertex_passthrough_with_credentials(self, monkeypatch): + """ + Test that when passthrough credentials are set, they are correctly used in the request + """ + from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import ( + PassthroughEndpointRouter, + ) + + vertex_project = "test-project" + vertex_location = "us-central1" + vertex_credentials = "test-creds" + + pass_through_router = PassthroughEndpointRouter() + + pass_through_router.add_vertex_credentials( + project_id=vertex_project, + location=vertex_location, + vertex_credentials=vertex_credentials, + ) + + monkeypatch.setattr( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router", + pass_through_router, + ) + + endpoint = f"/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/gemini-1.5-flash:generateContent" + + # Mock request + mock_request = Request( + scope={ + "type": "http", + "method": "POST", + "path": endpoint, + "headers": [ + (b"Authorization", b"Bearer test-creds"), + (b"Content-Type", b"application/json"), + ], + } + ) + + # Mock response + mock_response = Response() + + # Mock vertex credentials + test_project = vertex_project + test_location = vertex_location + test_token = vertex_credentials + + with mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async" + ) as mock_ensure_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url" + ) as mock_get_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route" + ) as mock_create_route: + mock_ensure_token.return_value = ("test-auth-header", test_project) + mock_get_token.return_value = (test_token, "") + + # Call the route + try: + await vertex_proxy_route( + endpoint=endpoint, + request=mock_request, + fastapi_response=mock_response, + ) + except Exception as e: + print(f"Error: {e}") + + # Verify create_pass_through_route was called with correct arguments + mock_create_route.assert_called_once_with( + endpoint=endpoint, + target=f"https://{test_location}-aiplatform.googleapis.com/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent", + custom_headers={"Authorization": f"Bearer {test_token}"}, + ) + + @pytest.mark.parametrize( + "initial_endpoint", + [ + "publishers/google/models/gemini-1.5-flash:generateContent", + "v1/projects/bad-project/locations/bad-location/publishers/google/models/gemini-1.5-flash:generateContent", + ], + ) + @pytest.mark.asyncio + async def test_vertex_passthrough_with_default_credentials( + self, monkeypatch, initial_endpoint + ): + """ + Test that when no passthrough credentials are set, default credentials are used in the request + """ + from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import ( + PassthroughEndpointRouter, + ) + + # Setup default credentials + default_project = "default-project" + default_location = "us-central1" + default_credentials = "default-creds" + + pass_through_router = PassthroughEndpointRouter() + pass_through_router.default_vertex_config = VertexPassThroughCredentials( + vertex_project=default_project, + vertex_location=default_location, + vertex_credentials=default_credentials, + ) + + monkeypatch.setattr( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router", + pass_through_router, + ) + + # Use different project/location in request than the default + endpoint = initial_endpoint + + mock_request = Request( + scope={ + "type": "http", + "method": "POST", + "path": f"/vertex_ai/{endpoint}", + "headers": {}, + } + ) + mock_response = Response() + + with mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async" + ) as mock_ensure_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url" + ) as mock_get_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route" + ) as mock_create_route: + mock_ensure_token.return_value = ("test-auth-header", default_project) + mock_get_token.return_value = (default_credentials, "") + + try: + await vertex_proxy_route( + endpoint=endpoint, + request=mock_request, + fastapi_response=mock_response, + ) + except Exception as e: + traceback.print_exc() + print(f"Error: {e}") + + # Verify default credentials were used + mock_create_route.assert_called_once_with( + endpoint=endpoint, + target=f"https://{default_location}-aiplatform.googleapis.com/v1/projects/{default_project}/locations/{default_location}/publishers/google/models/gemini-1.5-flash:generateContent", + custom_headers={"Authorization": f"Bearer {default_credentials}"}, + ) + + @pytest.mark.asyncio + async def test_vertex_passthrough_with_no_default_credentials(self, monkeypatch): + """ + Test that when no default credentials are set, the request fails + """ + """ + Test that when passthrough credentials are set, they are correctly used in the request + """ + from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import ( + PassthroughEndpointRouter, + ) + + vertex_project = "my-project" + vertex_location = "us-central1" + vertex_credentials = "test-creds" + + test_project = "test-project" + test_location = "test-location" + test_token = "test-creds" + + pass_through_router = PassthroughEndpointRouter() + + pass_through_router.add_vertex_credentials( + project_id=vertex_project, + location=vertex_location, + vertex_credentials=vertex_credentials, + ) + + monkeypatch.setattr( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router", + pass_through_router, + ) + + endpoint = f"/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent" + + # Mock request + mock_request = Request( + scope={ + "type": "http", + "method": "POST", + "path": endpoint, + "headers": [ + (b"authorization", b"Bearer test-creds"), + ], + } + ) + + # Mock response + mock_response = Response() + + with mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async" + ) as mock_ensure_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url" + ) as mock_get_token, mock.patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route" + ) as mock_create_route: + mock_ensure_token.return_value = ("test-auth-header", test_project) + mock_get_token.return_value = (test_token, "") + + # Call the route + try: + await vertex_proxy_route( + endpoint=endpoint, + request=mock_request, + fastapi_response=mock_response, + ) + except Exception as e: + traceback.print_exc() + print(f"Error: {e}") + + # Verify create_pass_through_route was called with correct arguments + mock_create_route.assert_called_once_with( + endpoint=endpoint, + target=f"https://{test_location}-aiplatform.googleapis.com/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent", + custom_headers={"authorization": f"Bearer {test_token}"}, + ) + + @pytest.mark.asyncio + async def test_async_vertex_proxy_route_api_key_auth(self): + """ + Critical + + This is how Vertex AI JS SDK will Auth to Litellm Proxy + """ + # Mock dependencies + mock_request = Mock() + mock_request.headers = {"x-litellm-api-key": "test-key-123"} + mock_request.method = "POST" + mock_response = Mock() + + with patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth" + ) as mock_auth: + mock_auth.return_value = {"api_key": "test-key-123"} + + with patch( + "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route" + ) as mock_pass_through: + mock_pass_through.return_value = AsyncMock( + return_value={"status": "success"} + ) + + # Call the function + result = await vertex_proxy_route( + endpoint="v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent", + request=mock_request, + fastapi_response=mock_response, + ) + + # Verify user_api_key_auth was called with the correct Bearer token + mock_auth.assert_called_once() + call_args = mock_auth.call_args[1] + assert call_args["api_key"] == "Bearer test-key-123" diff --git a/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py new file mode 100644 index 0000000000..bd8c5f5a99 --- /dev/null +++ b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py @@ -0,0 +1,44 @@ +import json +import os +import sys +import traceback +from unittest import mock +from unittest.mock import MagicMock, patch + +import httpx +import pytest +from fastapi import Request, Response +from fastapi.testclient import TestClient + +sys.path.insert( + 0, os.path.abspath("../../../..") +) # Adds the parent directory to the system path + +from unittest.mock import Mock + +from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key + + +@pytest.mark.asyncio +async def test_get_litellm_virtual_key(): + """ + Test that the get_litellm_virtual_key function correctly handles the API key authentication + """ + # Test with x-litellm-api-key + mock_request = Mock() + mock_request.headers = {"x-litellm-api-key": "test-key-123"} + result = get_litellm_virtual_key(mock_request) + assert result == "Bearer test-key-123" + + # Test with Authorization header + mock_request.headers = {"Authorization": "Bearer auth-key-456"} + result = get_litellm_virtual_key(mock_request) + assert result == "Bearer auth-key-456" + + # Test with both headers (x-litellm-api-key should take precedence) + mock_request.headers = { + "x-litellm-api-key": "test-key-123", + "Authorization": "Bearer auth-key-456", + } + result = get_litellm_virtual_key(mock_request) + assert result == "Bearer test-key-123" diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py index 37674551fe..677e13b08a 100644 --- a/tests/llm_responses_api_testing/test_openai_responses_api.py +++ b/tests/llm_responses_api_testing/test_openai_responses_api.py @@ -94,7 +94,7 @@ def validate_responses_api_response(response, final_chunk: bool = False): @pytest.mark.asyncio async def test_basic_openai_responses_api(sync_mode): litellm._turn_on_debug() - + litellm.set_verbose = True if sync_mode: response = litellm.responses( model="gpt-4o", input="Basic ping", max_output_tokens=20 @@ -826,3 +826,219 @@ async def test_async_bad_request_bad_param_error(): print(f"Exception details: {e.__dict__}") except Exception as e: pytest.fail(f"Unexpected exception raised: {e}") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("sync_mode", [True, False]) +async def test_openai_o1_pro_response_api(sync_mode): + """ + Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model + due to reaching max_output_tokens limit. + """ + # Mock response from o1-pro + mock_response = { + "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", + "object": "response", + "created_at": 1742486999, + "status": "incomplete", + "error": None, + "incomplete_details": {"reason": "max_output_tokens"}, + "instructions": None, + "max_output_tokens": 20, + "model": "o1-pro-2025-03-19", + "output": [ + { + "type": "reasoning", + "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", + "summary": [], + } + ], + "parallel_tool_calls": True, + "previous_response_id": None, + "reasoning": {"effort": "medium", "generate_summary": None}, + "store": True, + "temperature": 1.0, + "text": {"format": {"type": "text"}}, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 73, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens": 20, + "output_tokens_details": {"reasoning_tokens": 0}, + "total_tokens": 93, + }, + "user": None, + "metadata": {}, + } + + class MockResponse: + def __init__(self, json_data, status_code): + self._json_data = json_data + self.status_code = status_code + self.text = json.dumps(json_data) + + def json(self): # Changed from async to sync + return self._json_data + + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + new_callable=AsyncMock, + ) as mock_post: + # Configure the mock to return our response + mock_post.return_value = MockResponse(mock_response, 200) + + litellm._turn_on_debug() + litellm.set_verbose = True + + # Call o1-pro with max_output_tokens=20 + response = await litellm.aresponses( + model="openai/o1-pro", + input="Write a detailed essay about artificial intelligence and its impact on society", + max_output_tokens=20, + ) + + # Verify the request was made correctly + mock_post.assert_called_once() + request_body = json.loads(mock_post.call_args.kwargs["data"]) + assert request_body["model"] == "o1-pro" + assert request_body["max_output_tokens"] == 20 + + # Validate the response + print("Response:", json.dumps(response, indent=4, default=str)) + + # Check that the response has the expected structure + assert response["id"] == mock_response["id"] + assert response["status"] == "incomplete" + assert response["incomplete_details"].reason == "max_output_tokens" + assert response["max_output_tokens"] == 20 + + # Validate usage information + assert response["usage"]["input_tokens"] == 73 + assert response["usage"]["output_tokens"] == 20 + assert response["usage"]["total_tokens"] == 93 + + # Validate that the response is properly identified as incomplete + validate_responses_api_response(response, final_chunk=True) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("sync_mode", [True, False]) +async def test_openai_o1_pro_response_api_streaming(sync_mode): + """ + Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model + due to reaching max_output_tokens limit in both sync and async streaming modes. + """ + # Mock response from o1-pro + mock_response = { + "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", + "object": "response", + "created_at": 1742486999, + "status": "incomplete", + "error": None, + "incomplete_details": {"reason": "max_output_tokens"}, + "instructions": None, + "max_output_tokens": 20, + "model": "o1-pro-2025-03-19", + "output": [ + { + "type": "reasoning", + "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", + "summary": [], + } + ], + "parallel_tool_calls": True, + "previous_response_id": None, + "reasoning": {"effort": "medium", "generate_summary": None}, + "store": True, + "temperature": 1.0, + "text": {"format": {"type": "text"}}, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 73, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens": 20, + "output_tokens_details": {"reasoning_tokens": 0}, + "total_tokens": 93, + }, + "user": None, + "metadata": {}, + } + + class MockResponse: + def __init__(self, json_data, status_code): + self._json_data = json_data + self.status_code = status_code + self.text = json.dumps(json_data) + + def json(self): + return self._json_data + + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + new_callable=AsyncMock, + ) as mock_post: + # Configure the mock to return our response + mock_post.return_value = MockResponse(mock_response, 200) + + litellm._turn_on_debug() + litellm.set_verbose = True + + # Verify the request was made correctly + if sync_mode: + # For sync mode, we need to patch the sync HTTP handler + with patch( + "litellm.llms.custom_httpx.http_handler.HTTPHandler.post", + return_value=MockResponse(mock_response, 200), + ) as mock_sync_post: + response = litellm.responses( + model="openai/o1-pro", + input="Write a detailed essay about artificial intelligence and its impact on society", + max_output_tokens=20, + stream=True, + ) + + # Process the sync stream + event_count = 0 + for event in response: + print( + f"Sync litellm response #{event_count}:", + json.dumps(event, indent=4, default=str), + ) + event_count += 1 + + # Verify the sync request was made correctly + mock_sync_post.assert_called_once() + request_body = json.loads(mock_sync_post.call_args.kwargs["data"]) + assert request_body["model"] == "o1-pro" + assert request_body["max_output_tokens"] == 20 + assert "stream" not in request_body + else: + # For async mode + response = await litellm.aresponses( + model="openai/o1-pro", + input="Write a detailed essay about artificial intelligence and its impact on society", + max_output_tokens=20, + stream=True, + ) + + # Process the async stream + event_count = 0 + async for event in response: + print( + f"Async litellm response #{event_count}:", + json.dumps(event, indent=4, default=str), + ) + event_count += 1 + + # Verify the async request was made correctly + mock_post.assert_called_once() + request_body = json.loads(mock_post.call_args.kwargs["data"]) + assert request_body["model"] == "o1-pro" + assert request_body["max_output_tokens"] == 20 + assert "stream" not in request_body diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index 32f631daad..82a1ef40fb 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -20,6 +20,7 @@ from litellm.utils import ( get_optional_params, ProviderConfigManager, ) +from litellm.main import stream_chunk_builder from typing import Union # test_example.py @@ -338,7 +339,7 @@ class BaseLLMChatTest(ABC): @pytest.mark.flaky(retries=6, delay=1) def test_json_response_pydantic_obj(self): - litellm.set_verbose = True + litellm._turn_on_debug() from pydantic import BaseModel from litellm.utils import supports_response_schema @@ -995,3 +996,73 @@ class BaseOSeriesModelsTest(ABC): # test across azure/openai ), "temperature should not be in the request body" except Exception as e: pytest.fail(f"Error occurred: {e}") + + +class BaseAnthropicChatTest(ABC): + """ + Ensures consistent result across anthropic model usage + """ + + @abstractmethod + def get_base_completion_call_args(self) -> dict: + """Must return the base completion call args""" + pass + + @property + def completion_function(self): + return litellm.completion + + def test_anthropic_response_format_streaming_vs_non_streaming(self): + litellm.set_verbose = True + args = { + "messages": [ + { + "content": "Your goal is to summarize the previous agent's thinking process into short descriptions to let user better understand the research progress. If no information is available, just say generic phrase like 'Doing some research...' with the given output format. Make sure to adhere to the output format no matter what, even if you don't have any information or you are not allowed to respond to the given input information (then just say generic phrase like 'Doing some research...').", + "role": "system", + }, + { + "role": "user", + "content": "Here is the input data (previous agent's output): \n\n Let's try to refine our search further, focusing more on the technical aspects of home automation and home energy system management:", + }, + ], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "final_output", + "strict": True, + "schema": { + "description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n agent_doing: Brief description of what the agent is currently doing.\n Should be kept under 10 words. Example: "Learning about home automation"', + "properties": { + "agent_doing": {"title": "Agent Doing", "type": "string"} + }, + "required": ["agent_doing"], + "title": "ThinkingStep", + "type": "object", + "additionalProperties": False, + }, + }, + }, + } + + base_completion_call_args = self.get_base_completion_call_args() + + response = self.completion_function( + **base_completion_call_args, **args, stream=True + ) + + chunks = [] + for chunk in response: + print(f"chunk: {chunk}") + chunks.append(chunk) + + print(f"chunks: {chunks}") + built_response = stream_chunk_builder(chunks=chunks) + + non_stream_response = self.completion_function( + **base_completion_call_args, **args, stream=False + ) + + assert ( + json.loads(built_response.choices[0].message.content).keys() + == json.loads(non_stream_response.choices[0].message.content).keys() + ), f"Got={json.loads(built_response.choices[0].message.content)}, Expected={json.loads(non_stream_response.choices[0].message.content)}" diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index da47e745e7..a83d1d69e9 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -36,7 +36,7 @@ from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk from litellm.llms.anthropic.common_utils import process_anthropic_headers from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion from httpx import Headers -from base_llm_unit_tests import BaseLLMChatTest +from base_llm_unit_tests import BaseLLMChatTest, BaseAnthropicChatTest def streaming_format_tests(chunk: dict, idx: int): @@ -455,14 +455,15 @@ def test_create_json_tool_call_for_response_format(): _input_schema = tool.get("input_schema") assert _input_schema is not None assert _input_schema.get("type") == "object" - assert _input_schema.get("properties") == {"values": custom_schema} + assert _input_schema.get("name") == custom_schema["name"] + assert _input_schema.get("age") == custom_schema["age"] assert "additionalProperties" not in _input_schema from litellm import completion -class TestAnthropicCompletion(BaseLLMChatTest): +class TestAnthropicCompletion(BaseLLMChatTest, BaseAnthropicChatTest): def get_base_completion_call_args(self) -> dict: return {"model": "anthropic/claude-3-5-sonnet-20240620"} diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py index 172c946636..633ff76467 100644 --- a/tests/llm_translation/test_openai.py +++ b/tests/llm_translation/test_openai.py @@ -3,6 +3,7 @@ import os import sys from datetime import datetime from unittest.mock import AsyncMock, patch +from typing import Optional sys.path.insert( 0, os.path.abspath("../..") @@ -17,6 +18,10 @@ import litellm from litellm import Choices, Message, ModelResponse from base_llm_unit_tests import BaseLLMChatTest import asyncio +from litellm.types.llms.openai import ( + ChatCompletionAnnotation, + ChatCompletionAnnotationURLCitation, +) def test_openai_prediction_param(): @@ -391,3 +396,65 @@ def test_openai_chat_completion_streaming_handler_reasoning_content(): ) assert response.choices[0].delta.reasoning_content == "." + + +def validate_response_url_citation(url_citation: ChatCompletionAnnotationURLCitation): + assert "end_index" in url_citation + assert "start_index" in url_citation + assert "url" in url_citation + + +def validate_web_search_annotations(annotations: ChatCompletionAnnotation): + """validates litellm response contains web search annotations""" + print("annotations: ", annotations) + assert annotations is not None + assert isinstance(annotations, list) + for annotation in annotations: + assert annotation["type"] == "url_citation" + url_citation: ChatCompletionAnnotationURLCitation = annotation["url_citation"] + validate_response_url_citation(url_citation) + + +def test_openai_web_search(): + """Makes a simple web search request and validates the response contains web search annotations and all expected fields are present""" + litellm._turn_on_debug() + response = litellm.completion( + model="openai/gpt-4o-search-preview", + messages=[ + { + "role": "user", + "content": "What was a positive news story from today?", + } + ], + ) + print("litellm response: ", response.model_dump_json(indent=4)) + message = response.choices[0].message + annotations: ChatCompletionAnnotation = message.annotations + validate_web_search_annotations(annotations) + + +def test_openai_web_search_streaming(): + """Makes a simple web search request and validates the response contains web search annotations and all expected fields are present""" + # litellm._turn_on_debug() + test_openai_web_search: Optional[ChatCompletionAnnotation] = None + response = litellm.completion( + model="openai/gpt-4o-search-preview", + messages=[ + { + "role": "user", + "content": "What was a positive news story from today?", + } + ], + stream=True, + ) + for chunk in response: + print("litellm response chunk: ", chunk) + if ( + hasattr(chunk.choices[0].delta, "annotations") + and chunk.choices[0].delta.annotations is not None + ): + test_openai_web_search = chunk.choices[0].delta.annotations + + # Assert this request has at-least one web search annotation + assert test_openai_web_search is not None + validate_web_search_annotations(test_openai_web_search) diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index d71f3f7c24..1a0f6d7a8d 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -500,6 +500,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "supports_tool_choice": {"type": "boolean"}, "supports_video_input": {"type": "boolean"}, "supports_vision": {"type": "boolean"}, + "supports_web_search": {"type": "boolean"}, "tool_use_system_prompt_tokens": {"type": "number"}, "tpm": {"type": "number"}, "supported_endpoints": { @@ -518,6 +519,15 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): ], }, }, + "search_context_cost_per_query": { + "type": "object", + "properties": { + "search_context_size_low": {"type": "number"}, + "search_context_size_medium": {"type": "number"}, + "search_context_size_high": {"type": "number"}, + }, + "additionalProperties": False, + }, "supported_modalities": { "type": "array", "items": { diff --git a/tests/local_testing/test_sagemaker.py b/tests/local_testing/test_sagemaker.py index ba1ab11596..9c7161e4ae 100644 --- a/tests/local_testing/test_sagemaker.py +++ b/tests/local_testing/test_sagemaker.py @@ -8,7 +8,7 @@ from dotenv import load_dotenv load_dotenv() import io import os - +import litellm from test_streaming import streaming_format_tests sys.path.insert( @@ -96,26 +96,57 @@ async def test_completion_sagemaker_messages_api(sync_mode): litellm.set_verbose = True verbose_logger.setLevel(logging.DEBUG) print("testing sagemaker") + from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler + if sync_mode is True: - resp = litellm.completion( - model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", - messages=[ - {"role": "user", "content": "hi"}, - ], - temperature=0.2, - max_tokens=80, - ) - print(resp) + client = HTTPHandler() + with patch.object(client, "post") as mock_post: + try: + resp = litellm.completion( + model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", + messages=[ + {"role": "user", "content": "hi"}, + ], + temperature=0.2, + max_tokens=80, + client=client, + ) + except Exception as e: + print(e) + mock_post.assert_called_once() + json_data = json.loads(mock_post.call_args.kwargs["data"]) + assert ( + json_data["model"] + == "huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245" + ) + assert json_data["messages"] == [{"role": "user", "content": "hi"}] + assert json_data["temperature"] == 0.2 + assert json_data["max_tokens"] == 80 + else: - resp = await litellm.acompletion( - model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", - messages=[ - {"role": "user", "content": "hi"}, - ], - temperature=0.2, - max_tokens=80, - ) - print(resp) + client = AsyncHTTPHandler() + with patch.object(client, "post") as mock_post: + try: + resp = await litellm.acompletion( + model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", + messages=[ + {"role": "user", "content": "hi"}, + ], + temperature=0.2, + max_tokens=80, + client=client, + ) + except Exception as e: + print(e) + mock_post.assert_called_once() + json_data = json.loads(mock_post.call_args.kwargs["data"]) + assert ( + json_data["model"] + == "huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245" + ) + assert json_data["messages"] == [{"role": "user", "content": "hi"}] + assert json_data["temperature"] == 0.2 + assert json_data["max_tokens"] == 80 except Exception as e: pytest.fail(f"Error occurred: {e}") @@ -125,7 +156,7 @@ async def test_completion_sagemaker_messages_api(sync_mode): @pytest.mark.parametrize( "model", [ - "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", + # "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", "sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614", ], ) @@ -185,7 +216,7 @@ async def test_completion_sagemaker_stream(sync_mode, model): @pytest.mark.parametrize( "model", [ - "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", + # "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245", "sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614", ], ) diff --git a/tests/mcp_tests/mcp_server.py b/tests/mcp_tests/mcp_server.py new file mode 100644 index 0000000000..99a67edd02 --- /dev/null +++ b/tests/mcp_tests/mcp_server.py @@ -0,0 +1,20 @@ +# math_server.py +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("Math") + + +@mcp.tool() +def add(a: int, b: int) -> int: + """Add two numbers""" + return a + b + + +@mcp.tool() +def multiply(a: int, b: int) -> int: + """Multiply two numbers""" + return a * b + + +if __name__ == "__main__": + mcp.run(transport="stdio") diff --git a/tests/mcp_tests/test_mcp_litellm_client.py b/tests/mcp_tests/test_mcp_litellm_client.py new file mode 100644 index 0000000000..0f8fb7994a --- /dev/null +++ b/tests/mcp_tests/test_mcp_litellm_client.py @@ -0,0 +1,86 @@ +# Create server parameters for stdio connection +import os +import sys +import pytest + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import os +from litellm import experimental_mcp_client +import litellm +import pytest +import json + + +@pytest.mark.asyncio +async def test_mcp_agent(): + local_server_path = "./mcp_server.py" + ci_cd_server_path = "tests/mcp_tests/mcp_server.py" + server_params = StdioServerParameters( + command="python3", + # Make sure to update to the full absolute path to your math_server.py file + args=[ci_cd_server_path], + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection + await session.initialize() + + # Get tools + tools = await experimental_mcp_client.load_mcp_tools( + session=session, format="openai" + ) + print("MCP TOOLS: ", tools) + + # Create and run the agent + messages = [{"role": "user", "content": "what's (3 + 5)"}] + llm_response = await litellm.acompletion( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + messages=messages, + tools=tools, + tool_choice="required", + ) + print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str)) + # Add assertions to verify the response + assert llm_response["choices"][0]["message"]["tool_calls"] is not None + + assert ( + llm_response["choices"][0]["message"]["tool_calls"][0]["function"][ + "name" + ] + == "add" + ) + openai_tool = llm_response["choices"][0]["message"]["tool_calls"][0] + + # Call the tool using MCP client + call_result = await experimental_mcp_client.call_openai_tool( + session=session, + openai_tool=openai_tool, + ) + print("CALL RESULT: ", call_result) + + # send the tool result to the LLM + messages.append(llm_response["choices"][0]["message"]) + messages.append( + { + "role": "tool", + "content": str(call_result.content[0].text), + "tool_call_id": openai_tool["id"], + } + ) + print("final messages: ", messages) + llm_response = await litellm.acompletion( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + messages=messages, + tools=tools, + ) + print( + "FINAL LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str) + ) diff --git a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py index db0a647e41..cb9db00324 100644 --- a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py +++ b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py @@ -339,9 +339,6 @@ def test_pass_through_routes_support_all_methods(): from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( router as llm_router, ) - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - router as vertex_router, - ) # Expected HTTP methods expected_methods = {"GET", "POST", "PUT", "DELETE", "PATCH"} @@ -361,7 +358,6 @@ def test_pass_through_routes_support_all_methods(): # Check both routers check_router_methods(llm_router) - check_router_methods(vertex_router) def test_is_bedrock_agent_runtime_route(): diff --git a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py index 6e8296876a..8e016b68d0 100644 --- a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py +++ b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py @@ -11,6 +11,7 @@ from unittest.mock import patch from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import ( PassthroughEndpointRouter, ) +from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials passthrough_endpoint_router = PassthroughEndpointRouter() @@ -132,3 +133,185 @@ class TestPassthroughEndpointRouter(unittest.TestCase): ), "COHERE_API_KEY", ) + + def test_get_deployment_key(self): + """Test _get_deployment_key with various inputs""" + router = PassthroughEndpointRouter() + + # Test with valid inputs + key = router._get_deployment_key("test-project", "us-central1") + assert key == "test-project-us-central1" + + # Test with None values + key = router._get_deployment_key(None, "us-central1") + assert key is None + + key = router._get_deployment_key("test-project", None) + assert key is None + + key = router._get_deployment_key(None, None) + assert key is None + + def test_add_vertex_credentials(self): + """Test add_vertex_credentials functionality""" + router = PassthroughEndpointRouter() + + # Test adding valid credentials + router.add_vertex_credentials( + project_id="test-project", + location="us-central1", + vertex_credentials='{"credentials": "test-creds"}', + ) + + assert "test-project-us-central1" in router.deployment_key_to_vertex_credentials + creds = router.deployment_key_to_vertex_credentials["test-project-us-central1"] + assert creds.vertex_project == "test-project" + assert creds.vertex_location == "us-central1" + assert creds.vertex_credentials == '{"credentials": "test-creds"}' + + # Test adding with None values + router.add_vertex_credentials( + project_id=None, + location=None, + vertex_credentials='{"credentials": "test-creds"}', + ) + # Should not add None values + assert len(router.deployment_key_to_vertex_credentials) == 1 + + def test_default_credentials(self): + """ + Test get_vertex_credentials with stored credentials. + + Tests if default credentials are used if set. + + Tests if no default credentials are used, if no default set + """ + router = PassthroughEndpointRouter() + router.add_vertex_credentials( + project_id="test-project", + location="us-central1", + vertex_credentials='{"credentials": "test-creds"}', + ) + + creds = router.get_vertex_credentials( + project_id="test-project", location="us-central2" + ) + + assert creds is None + + def test_get_vertex_env_vars(self): + """Test that _get_vertex_env_vars correctly reads environment variables""" + # Set environment variables for the test + os.environ["DEFAULT_VERTEXAI_PROJECT"] = "test-project-123" + os.environ["DEFAULT_VERTEXAI_LOCATION"] = "us-central1" + os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/creds" + + try: + result = self.router._get_vertex_env_vars() + print(result) + + # Verify the result + assert isinstance(result, VertexPassThroughCredentials) + assert result.vertex_project == "test-project-123" + assert result.vertex_location == "us-central1" + assert result.vertex_credentials == "/path/to/creds" + + finally: + # Clean up environment variables + del os.environ["DEFAULT_VERTEXAI_PROJECT"] + del os.environ["DEFAULT_VERTEXAI_LOCATION"] + del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] + + def test_set_default_vertex_config(self): + """Test set_default_vertex_config with various inputs""" + # Test with None config - set environment variables first + os.environ["DEFAULT_VERTEXAI_PROJECT"] = "env-project" + os.environ["DEFAULT_VERTEXAI_LOCATION"] = "env-location" + os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "env-creds" + os.environ["GOOGLE_CREDS"] = "secret-creds" + + try: + # Test with None config + self.router.set_default_vertex_config() + + assert self.router.default_vertex_config.vertex_project == "env-project" + assert self.router.default_vertex_config.vertex_location == "env-location" + assert self.router.default_vertex_config.vertex_credentials == "env-creds" + + # Test with valid config.yaml settings on vertex_config + test_config = { + "vertex_project": "my-project-123", + "vertex_location": "us-central1", + "vertex_credentials": "path/to/creds", + } + self.router.set_default_vertex_config(test_config) + + assert self.router.default_vertex_config.vertex_project == "my-project-123" + assert self.router.default_vertex_config.vertex_location == "us-central1" + assert ( + self.router.default_vertex_config.vertex_credentials == "path/to/creds" + ) + + # Test with environment variable reference + test_config = { + "vertex_project": "my-project-123", + "vertex_location": "us-central1", + "vertex_credentials": "os.environ/GOOGLE_CREDS", + } + self.router.set_default_vertex_config(test_config) + + assert ( + self.router.default_vertex_config.vertex_credentials == "secret-creds" + ) + + finally: + # Clean up environment variables + del os.environ["DEFAULT_VERTEXAI_PROJECT"] + del os.environ["DEFAULT_VERTEXAI_LOCATION"] + del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] + del os.environ["GOOGLE_CREDS"] + + def test_vertex_passthrough_router_init(self): + """Test VertexPassThroughRouter initialization""" + router = PassthroughEndpointRouter() + assert isinstance(router.deployment_key_to_vertex_credentials, dict) + assert len(router.deployment_key_to_vertex_credentials) == 0 + + def test_get_vertex_credentials_none(self): + """Test get_vertex_credentials with various inputs""" + router = PassthroughEndpointRouter() + + router.set_default_vertex_config( + config={ + "vertex_project": None, + "vertex_location": None, + "vertex_credentials": None, + } + ) + + # Test with None project_id and location - should return default config + creds = router.get_vertex_credentials(None, None) + assert isinstance(creds, VertexPassThroughCredentials) + + # Test with valid project_id and location but no stored credentials + creds = router.get_vertex_credentials("test-project", "us-central1") + assert isinstance(creds, VertexPassThroughCredentials) + assert creds.vertex_project is None + assert creds.vertex_location is None + assert creds.vertex_credentials is None + + def test_get_vertex_credentials_stored(self): + """Test get_vertex_credentials with stored credentials""" + router = PassthroughEndpointRouter() + router.add_vertex_credentials( + project_id="test-project", + location="us-central1", + vertex_credentials='{"credentials": "test-creds"}', + ) + + creds = router.get_vertex_credentials( + project_id="test-project", location="us-central1" + ) + assert creds.vertex_project == "test-project" + assert creds.vertex_location == "us-central1" + assert creds.vertex_credentials == '{"credentials": "test-creds"}' diff --git a/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py b/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py deleted file mode 100644 index ba5dfa33a8..0000000000 --- a/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py +++ /dev/null @@ -1,294 +0,0 @@ -import json -import os -import sys -from datetime import datetime -from unittest.mock import AsyncMock, Mock, patch - -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system-path - - -import httpx -import pytest -import litellm -from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj - - -from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - get_litellm_virtual_key, - vertex_proxy_route, - _get_vertex_env_vars, - set_default_vertex_config, - VertexPassThroughCredentials, - default_vertex_config, -) -from litellm.proxy.vertex_ai_endpoints.vertex_passthrough_router import ( - VertexPassThroughRouter, -) - - -@pytest.mark.asyncio -async def test_get_litellm_virtual_key(): - """ - Test that the get_litellm_virtual_key function correctly handles the API key authentication - """ - # Test with x-litellm-api-key - mock_request = Mock() - mock_request.headers = {"x-litellm-api-key": "test-key-123"} - result = get_litellm_virtual_key(mock_request) - assert result == "Bearer test-key-123" - - # Test with Authorization header - mock_request.headers = {"Authorization": "Bearer auth-key-456"} - result = get_litellm_virtual_key(mock_request) - assert result == "Bearer auth-key-456" - - # Test with both headers (x-litellm-api-key should take precedence) - mock_request.headers = { - "x-litellm-api-key": "test-key-123", - "Authorization": "Bearer auth-key-456", - } - result = get_litellm_virtual_key(mock_request) - assert result == "Bearer test-key-123" - - -@pytest.mark.asyncio -async def test_async_vertex_proxy_route_api_key_auth(): - """ - Critical - - This is how Vertex AI JS SDK will Auth to Litellm Proxy - """ - # Mock dependencies - mock_request = Mock() - mock_request.headers = {"x-litellm-api-key": "test-key-123"} - mock_request.method = "POST" - mock_response = Mock() - - with patch( - "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.user_api_key_auth" - ) as mock_auth: - mock_auth.return_value = {"api_key": "test-key-123"} - - with patch( - "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.create_pass_through_route" - ) as mock_pass_through: - mock_pass_through.return_value = AsyncMock( - return_value={"status": "success"} - ) - - # Call the function - result = await vertex_proxy_route( - endpoint="v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent", - request=mock_request, - fastapi_response=mock_response, - ) - - # Verify user_api_key_auth was called with the correct Bearer token - mock_auth.assert_called_once() - call_args = mock_auth.call_args[1] - assert call_args["api_key"] == "Bearer test-key-123" - - -@pytest.mark.asyncio -async def test_get_vertex_env_vars(): - """Test that _get_vertex_env_vars correctly reads environment variables""" - # Set environment variables for the test - os.environ["DEFAULT_VERTEXAI_PROJECT"] = "test-project-123" - os.environ["DEFAULT_VERTEXAI_LOCATION"] = "us-central1" - os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/creds" - - try: - result = _get_vertex_env_vars() - print(result) - - # Verify the result - assert isinstance(result, VertexPassThroughCredentials) - assert result.vertex_project == "test-project-123" - assert result.vertex_location == "us-central1" - assert result.vertex_credentials == "/path/to/creds" - - finally: - # Clean up environment variables - del os.environ["DEFAULT_VERTEXAI_PROJECT"] - del os.environ["DEFAULT_VERTEXAI_LOCATION"] - del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] - - -@pytest.mark.asyncio -async def test_set_default_vertex_config(): - """Test set_default_vertex_config with various inputs""" - # Test with None config - set environment variables first - os.environ["DEFAULT_VERTEXAI_PROJECT"] = "env-project" - os.environ["DEFAULT_VERTEXAI_LOCATION"] = "env-location" - os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "env-creds" - os.environ["GOOGLE_CREDS"] = "secret-creds" - - try: - # Test with None config - set_default_vertex_config() - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - default_vertex_config, - ) - - assert default_vertex_config.vertex_project == "env-project" - assert default_vertex_config.vertex_location == "env-location" - assert default_vertex_config.vertex_credentials == "env-creds" - - # Test with valid config.yaml settings on vertex_config - test_config = { - "vertex_project": "my-project-123", - "vertex_location": "us-central1", - "vertex_credentials": "path/to/creds", - } - set_default_vertex_config(test_config) - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - default_vertex_config, - ) - - assert default_vertex_config.vertex_project == "my-project-123" - assert default_vertex_config.vertex_location == "us-central1" - assert default_vertex_config.vertex_credentials == "path/to/creds" - - # Test with environment variable reference - test_config = { - "vertex_project": "my-project-123", - "vertex_location": "us-central1", - "vertex_credentials": "os.environ/GOOGLE_CREDS", - } - set_default_vertex_config(test_config) - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - default_vertex_config, - ) - - assert default_vertex_config.vertex_credentials == "secret-creds" - - finally: - # Clean up environment variables - del os.environ["DEFAULT_VERTEXAI_PROJECT"] - del os.environ["DEFAULT_VERTEXAI_LOCATION"] - del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] - del os.environ["GOOGLE_CREDS"] - - -@pytest.mark.asyncio -async def test_vertex_passthrough_router_init(): - """Test VertexPassThroughRouter initialization""" - router = VertexPassThroughRouter() - assert isinstance(router.deployment_key_to_vertex_credentials, dict) - assert len(router.deployment_key_to_vertex_credentials) == 0 - - -@pytest.mark.asyncio -async def test_get_vertex_credentials_none(): - """Test get_vertex_credentials with various inputs""" - from litellm.proxy.vertex_ai_endpoints import vertex_endpoints - - setattr(vertex_endpoints, "default_vertex_config", VertexPassThroughCredentials()) - router = VertexPassThroughRouter() - - # Test with None project_id and location - should return default config - creds = router.get_vertex_credentials(None, None) - assert isinstance(creds, VertexPassThroughCredentials) - - # Test with valid project_id and location but no stored credentials - creds = router.get_vertex_credentials("test-project", "us-central1") - assert isinstance(creds, VertexPassThroughCredentials) - assert creds.vertex_project is None - assert creds.vertex_location is None - assert creds.vertex_credentials is None - - -@pytest.mark.asyncio -async def test_get_vertex_credentials_stored(): - """Test get_vertex_credentials with stored credentials""" - router = VertexPassThroughRouter() - router.add_vertex_credentials( - project_id="test-project", - location="us-central1", - vertex_credentials='{"credentials": "test-creds"}', - ) - - creds = router.get_vertex_credentials( - project_id="test-project", location="us-central1" - ) - assert creds.vertex_project == "test-project" - assert creds.vertex_location == "us-central1" - assert creds.vertex_credentials == '{"credentials": "test-creds"}' - - -@pytest.mark.asyncio -async def test_add_vertex_credentials(): - """Test add_vertex_credentials functionality""" - router = VertexPassThroughRouter() - - # Test adding valid credentials - router.add_vertex_credentials( - project_id="test-project", - location="us-central1", - vertex_credentials='{"credentials": "test-creds"}', - ) - - assert "test-project-us-central1" in router.deployment_key_to_vertex_credentials - creds = router.deployment_key_to_vertex_credentials["test-project-us-central1"] - assert creds.vertex_project == "test-project" - assert creds.vertex_location == "us-central1" - assert creds.vertex_credentials == '{"credentials": "test-creds"}' - - # Test adding with None values - router.add_vertex_credentials( - project_id=None, - location=None, - vertex_credentials='{"credentials": "test-creds"}', - ) - # Should not add None values - assert len(router.deployment_key_to_vertex_credentials) == 1 - - -@pytest.mark.asyncio -async def test_get_deployment_key(): - """Test _get_deployment_key with various inputs""" - router = VertexPassThroughRouter() - - # Test with valid inputs - key = router._get_deployment_key("test-project", "us-central1") - assert key == "test-project-us-central1" - - # Test with None values - key = router._get_deployment_key(None, "us-central1") - assert key is None - - key = router._get_deployment_key("test-project", None) - assert key is None - - key = router._get_deployment_key(None, None) - assert key is None - - -@pytest.mark.asyncio -async def test_get_vertex_project_id_from_url(): - """Test _get_vertex_project_id_from_url with various URLs""" - # Test with valid URL - url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent" - project_id = VertexPassThroughRouter._get_vertex_project_id_from_url(url) - assert project_id == "test-project" - - # Test with invalid URL - url = "https://invalid-url.com" - project_id = VertexPassThroughRouter._get_vertex_project_id_from_url(url) - assert project_id is None - - -@pytest.mark.asyncio -async def test_get_vertex_location_from_url(): - """Test _get_vertex_location_from_url with various URLs""" - # Test with valid URL - url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent" - location = VertexPassThroughRouter._get_vertex_location_from_url(url) - assert location == "us-central1" - - # Test with invalid URL - url = "https://invalid-url.com" - location = VertexPassThroughRouter._get_vertex_location_from_url(url) - assert location is None diff --git a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py index 718f707755..937eb6f298 100644 --- a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py +++ b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py @@ -30,9 +30,6 @@ from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles, UserAPIKey from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( router as llm_passthrough_router, ) -from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - router as vertex_router, -) # Replace the actual hash_token function with our mock import litellm.proxy.auth.route_checks @@ -96,7 +93,7 @@ def test_is_llm_api_route(): assert RouteChecks.is_llm_api_route("/key/regenerate/82akk800000000jjsk") is False assert RouteChecks.is_llm_api_route("/key/82akk800000000jjsk/delete") is False - all_llm_api_routes = vertex_router.routes + llm_passthrough_router.routes + all_llm_api_routes = llm_passthrough_router.routes # check all routes in llm_passthrough_router, ensure they are considered llm api routes for route in all_llm_api_routes: @@ -165,7 +162,6 @@ def test_llm_api_route(route_checks): route="/v1/chat/completions", request=MockRequest(), valid_token=UserAPIKeyAuth(api_key="test_key"), - api_key="test_key", request_data={}, ) is None @@ -183,7 +179,6 @@ def test_key_info_route_allowed(route_checks): route="/key/info", request=MockRequest(query_params={"key": "test_key"}), valid_token=UserAPIKeyAuth(api_key="test_key"), - api_key="test_key", request_data={}, ) is None @@ -201,7 +196,6 @@ def test_user_info_route_allowed(route_checks): route="/user/info", request=MockRequest(query_params={"user_id": "test_user"}), valid_token=UserAPIKeyAuth(api_key="test_key", user_id="test_user"), - api_key="test_key", request_data={}, ) is None @@ -219,7 +213,6 @@ def test_user_info_route_forbidden(route_checks): route="/user/info", request=MockRequest(query_params={"user_id": "wrong_user"}), valid_token=UserAPIKeyAuth(api_key="test_key", user_id="test_user"), - api_key="test_key", request_data={}, ) assert exc_info.value.status_code == 403 diff --git a/tests/proxy_unit_tests/test_user_api_key_auth.py b/tests/proxy_unit_tests/test_user_api_key_auth.py index e956a22282..f0ca27c946 100644 --- a/tests/proxy_unit_tests/test_user_api_key_auth.py +++ b/tests/proxy_unit_tests/test_user_api_key_auth.py @@ -4,6 +4,9 @@ import os import sys +import litellm.proxy +import litellm.proxy.proxy_server + sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path @@ -329,7 +332,7 @@ async def test_auth_with_allowed_routes(route, should_raise_error): ], ) def test_is_ui_route_allowed(route, user_role, expected_result): - from litellm.proxy.auth.user_api_key_auth import _is_ui_route + from litellm.proxy.auth.auth_checks import _is_ui_route from litellm.proxy._types import LiteLLM_UserTable user_obj = LiteLLM_UserTable( @@ -367,7 +370,7 @@ def test_is_ui_route_allowed(route, user_role, expected_result): ], ) def test_is_api_route_allowed(route, user_role, expected_result): - from litellm.proxy.auth.user_api_key_auth import _is_api_route_allowed + from litellm.proxy.auth.auth_checks import _is_api_route_allowed from litellm.proxy._types import LiteLLM_UserTable user_obj = LiteLLM_UserTable( @@ -635,7 +638,7 @@ async def test_soft_budget_alert(): def test_is_allowed_route(): - from litellm.proxy.auth.user_api_key_auth import _is_allowed_route + from litellm.proxy.auth.auth_checks import _is_allowed_route from litellm.proxy._types import UserAPIKeyAuth import datetime @@ -646,7 +649,6 @@ def test_is_allowed_route(): "token_type": "api", "request": request, "request_data": {"input": ["hello world"], "model": "embedding-small"}, - "api_key": "9644159bc181998825c44c788b1526341ed2e825d1b6f562e23173759e14bb86", "valid_token": UserAPIKeyAuth( token="9644159bc181998825c44c788b1526341ed2e825d1b6f562e23173759e14bb86", key_name="sk-...CJjQ", @@ -734,7 +736,7 @@ def test_is_allowed_route(): ], ) def test_is_user_proxy_admin(user_obj, expected_result): - from litellm.proxy.auth.user_api_key_auth import _is_user_proxy_admin + from litellm.proxy.auth.auth_checks import _is_user_proxy_admin assert _is_user_proxy_admin(user_obj) == expected_result @@ -947,3 +949,53 @@ def test_get_model_from_request(route, request_data, expected_model): from litellm.proxy.auth.user_api_key_auth import get_model_from_request assert get_model_from_request(request_data, route) == expected_model + + +@pytest.mark.asyncio +async def test_jwt_non_admin_team_route_access(monkeypatch): + """ + Test that a non-admin JWT user cannot access team management routes + """ + from fastapi import Request, HTTPException + from starlette.datastructures import URL + from unittest.mock import patch + from litellm.proxy.auth.user_api_key_auth import user_api_key_auth + import json + from litellm.proxy._types import ProxyException + + mock_jwt_response = { + "is_proxy_admin": False, + "team_id": None, + "team_object": None, + "user_id": None, + "user_object": None, + "org_id": None, + "org_object": None, + "end_user_id": None, + "end_user_object": None, + "token": "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJmR09YQTNhbHFObjByRzJ6OHJQT1FLZVVMSWxCNDFnVWl4VDJ5WE1QVG1ZIn0.eyJleHAiOjE3NDI2MDAzODIsImlhdCI6MTc0MjYwMDA4MiwianRpIjoiODRhNjZmZjAtMTE5OC00YmRkLTk1NzAtNWZhMjNhZjYxMmQyIiwiaXNzIjoiaHR0cDovL2xvY2FsaG9zdDo4MDgwL3JlYWxtcy9saXRlbGxtLXJlYWxtIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6ImZmMGZjOGNiLWUyMjktNDkyYy05NzYwLWNlYzVhMDYxNmI2MyIsInR5cCI6IkJlYXJlciIsImF6cCI6ImxpdGVsbG0tdGVzdC1jbGllbnQtaWQiLCJzaWQiOiI4MTYwNjIxOC0yNmZmLTQwMjAtOWQxNy05Zjc0YmFlNTBkODUiLCJhY3IiOiIxIiwiYWxsb3dlZC1vcmlnaW5zIjpbImh0dHA6Ly9sb2NhbGhvc3Q6NDAwMC8qIl0sInJlYWxtX2FjY2VzcyI6eyJyb2xlcyI6WyJvZmZsaW5lX2FjY2VzcyIsImRlZmF1bHQtcm9sZXMtbGl0ZWxsbS1yZWFsbSIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJwcm9maWxlIGdyb3Vwcy1zY29wZSBlbWFpbCBsaXRlbGxtLmFwaS5jb25zdW1lciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJuYW1lIjoiS3Jpc2ggRGhvbGFraWEiLCJncm91cHMiOlsiL28zX21pbmlfYWNjZXNzIl0sInByZWZlcnJlZF91c2VybmFtZSI6ImtycmlzaGRoMiIsImdpdmVuX25hbWUiOiJLcmlzaCIsImZhbWlseV9uYW1lIjoiRGhvbGFraWEiLCJlbWFpbCI6ImtycmlzaGRob2xha2lhMkBnbWFpbC5jb20ifQ.Fu2ErZhnfez-bhn_XmjkywcFdZHcFUSvzIzfdNiEowdA0soLmCyqf9731amP6m68shd9qk11e0mQhxFIAIxZPojViC1Csc9TBXLRRQ8ESMd6gPIj-DBkKVkQSZLJ1uibsh4Oo2RViGtqWVcEt32T8U_xhGdtdzNkJ8qy_e0fdNDsUnhmSaTQvmZJYarW0roIrkC-zYZrX3fftzbQfavSu9eqdfPf6wUttIrkaWThWUuORy-xaeZfSmvsGbEg027hh6QwlChiZTSF8R6bRxoqfPN3ZaGFFgbBXNRYZA_eYi2IevhIwJHi_r4o1UvtKAJyfPefm-M6hCfkN_6da4zsog", + } + + # Create request + request = Request( + scope={"type": "http", "headers": [("Authorization", "Bearer fake.jwt.token")]} + ) + request._url = URL(url="/team/new") + + monkeypatch.setattr( + litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True} + ) + + # Mock JWTAuthManager.auth_builder + with patch( + "litellm.proxy.auth.handle_jwt.JWTAuthManager.auth_builder", + return_value=mock_jwt_response, + ): + try: + await user_api_key_auth(request=request, api_key="Bearer fake.jwt.token") + pytest.fail( + "Expected this call to fail. Non-admin user should not access team routes." + ) + except ProxyException as e: + print("e", e) + assert "Only proxy admin can be used to generate" in str(e.message) diff --git a/tests/router_unit_tests/test_router_adding_deployments.py b/tests/router_unit_tests/test_router_adding_deployments.py index fca3f147e5..53fe7347d3 100644 --- a/tests/router_unit_tests/test_router_adding_deployments.py +++ b/tests/router_unit_tests/test_router_adding_deployments.py @@ -36,11 +36,11 @@ def test_initialize_deployment_for_pass_through_success(): ) # Verify the credentials were properly set - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - vertex_pass_through_router, + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + passthrough_endpoint_router, ) - vertex_creds = vertex_pass_through_router.get_vertex_credentials( + vertex_creds = passthrough_endpoint_router.get_vertex_credentials( project_id="test-project", location="us-central1" ) assert vertex_creds.vertex_project == "test-project" @@ -123,21 +123,21 @@ def test_add_vertex_pass_through_deployment(): router.add_deployment(deployment) # Get the vertex credentials from the router - from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( - vertex_pass_through_router, + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + passthrough_endpoint_router, ) # current state of pass-through vertex router print("\n vertex_pass_through_router.deployment_key_to_vertex_credentials\n\n") print( json.dumps( - vertex_pass_through_router.deployment_key_to_vertex_credentials, + passthrough_endpoint_router.deployment_key_to_vertex_credentials, indent=4, default=str, ) ) - vertex_creds = vertex_pass_through_router.get_vertex_credentials( + vertex_creds = passthrough_endpoint_router.get_vertex_credentials( project_id="test-project", location="us-central1" ) diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json index 307e95217f..39ab75d8c7 100644 --- a/ui/litellm-dashboard/package-lock.json +++ b/ui/litellm-dashboard/package-lock.json @@ -21,7 +21,7 @@ "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", "moment": "^2.30.1", - "next": "^14.2.15", + "next": "^14.2.25", "openai": "^4.28.0", "papaparse": "^5.5.2", "react": "^18", @@ -418,9 +418,10 @@ } }, "node_modules/@next/env": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.21.tgz", - "integrity": "sha512-lXcwcJd5oR01tggjWJ6SrNNYFGuOOMB9c251wUNkjCpkoXOPkDeF/15c3mnVlBqrW4JJXb2kVxDFhC4GduJt2A==" + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.25.tgz", + "integrity": "sha512-JnzQ2cExDeG7FxJwqAksZ3aqVJrHjFwZQAEJ9gQZSoEhIow7SNoKZzju/AwQ+PLIR4NY8V0rhcVozx/2izDO0w==", + "license": "MIT" }, "node_modules/@next/eslint-plugin-next": { "version": "14.1.0", @@ -432,12 +433,13 @@ } }, "node_modules/@next/swc-darwin-arm64": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.21.tgz", - "integrity": "sha512-HwEjcKsXtvszXz5q5Z7wCtrHeTTDSTgAbocz45PHMUjU3fBYInfvhR+ZhavDRUYLonm53aHZbB09QtJVJj8T7g==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.25.tgz", + "integrity": "sha512-09clWInF1YRd6le00vt750s3m7SEYNehz9C4PUcSu3bAdCTpjIV4aTYQZ25Ehrr83VR1rZeqtKUPWSI7GfuKZQ==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "darwin" @@ -447,12 +449,13 @@ } }, "node_modules/@next/swc-darwin-x64": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.21.tgz", - "integrity": "sha512-TSAA2ROgNzm4FhKbTbyJOBrsREOMVdDIltZ6aZiKvCi/v0UwFmwigBGeqXDA97TFMpR3LNNpw52CbVelkoQBxA==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.25.tgz", + "integrity": "sha512-V+iYM/QR+aYeJl3/FWWU/7Ix4b07ovsQ5IbkwgUK29pTHmq+5UxeDr7/dphvtXEq5pLB/PucfcBNh9KZ8vWbug==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "darwin" @@ -462,12 +465,13 @@ } }, "node_modules/@next/swc-linux-arm64-gnu": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.21.tgz", - "integrity": "sha512-0Dqjn0pEUz3JG+AImpnMMW/m8hRtl1GQCNbO66V1yp6RswSTiKmnHf3pTX6xMdJYSemf3O4Q9ykiL0jymu0TuA==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.25.tgz", + "integrity": "sha512-LFnV2899PJZAIEHQ4IMmZIgL0FBieh5keMnriMY1cK7ompR+JUd24xeTtKkcaw8QmxmEdhoE5Mu9dPSuDBgtTg==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -477,12 +481,13 @@ } }, "node_modules/@next/swc-linux-arm64-musl": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.21.tgz", - "integrity": "sha512-Ggfw5qnMXldscVntwnjfaQs5GbBbjioV4B4loP+bjqNEb42fzZlAaK+ldL0jm2CTJga9LynBMhekNfV8W4+HBw==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.25.tgz", + "integrity": "sha512-QC5y5PPTmtqFExcKWKYgUNkHeHE/z3lUsu83di488nyP0ZzQ3Yse2G6TCxz6nNsQwgAx1BehAJTZez+UQxzLfw==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -492,12 +497,13 @@ } }, "node_modules/@next/swc-linux-x64-gnu": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.21.tgz", - "integrity": "sha512-uokj0lubN1WoSa5KKdThVPRffGyiWlm/vCc/cMkWOQHw69Qt0X1o3b2PyLLx8ANqlefILZh1EdfLRz9gVpG6tg==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.25.tgz", + "integrity": "sha512-y6/ML4b9eQ2D/56wqatTJN5/JR8/xdObU2Fb1RBidnrr450HLCKr6IJZbPqbv7NXmje61UyxjF5kvSajvjye5w==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -507,12 +513,13 @@ } }, "node_modules/@next/swc-linux-x64-musl": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.21.tgz", - "integrity": "sha512-iAEBPzWNbciah4+0yI4s7Pce6BIoxTQ0AGCkxn/UBuzJFkYyJt71MadYQkjPqCQCJAFQ26sYh7MOKdU+VQFgPg==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.25.tgz", + "integrity": "sha512-sPX0TSXHGUOZFvv96GoBXpB3w4emMqKeMgemrSxI7A6l55VBJp/RKYLwZIB9JxSqYPApqiREaIIap+wWq0RU8w==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -522,12 +529,13 @@ } }, "node_modules/@next/swc-win32-arm64-msvc": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.21.tgz", - "integrity": "sha512-plykgB3vL2hB4Z32W3ktsfqyuyGAPxqwiyrAi2Mr8LlEUhNn9VgkiAl5hODSBpzIfWweX3er1f5uNpGDygfQVQ==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.25.tgz", + "integrity": "sha512-ReO9S5hkA1DU2cFCsGoOEp7WJkhFzNbU/3VUF6XxNGUCQChyug6hZdYL/istQgfT/GWE6PNIg9cm784OI4ddxQ==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "win32" @@ -537,12 +545,13 @@ } }, "node_modules/@next/swc-win32-ia32-msvc": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.21.tgz", - "integrity": "sha512-w5bacz4Vxqrh06BjWgua3Yf7EMDb8iMcVhNrNx8KnJXt8t+Uu0Zg4JHLDL/T7DkTCEEfKXO/Er1fcfWxn2xfPA==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.25.tgz", + "integrity": "sha512-DZ/gc0o9neuCDyD5IumyTGHVun2dCox5TfPQI/BJTYwpSNYM3CZDI4i6TOdjeq1JMo+Ug4kPSMuZdwsycwFbAw==", "cpu": [ "ia32" ], + "license": "MIT", "optional": true, "os": [ "win32" @@ -552,12 +561,13 @@ } }, "node_modules/@next/swc-win32-x64-msvc": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.21.tgz", - "integrity": "sha512-sT6+llIkzpsexGYZq8cjjthRyRGe5cJVhqh12FmlbxHqna6zsDDK8UNaV7g41T6atFHCJUPeLb3uyAwrBwy0NA==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.25.tgz", + "integrity": "sha512-KSznmS6eFjQ9RJ1nEc66kJvtGIL1iZMYmGEXsZPh2YtnLtqrgdVvKXJY2ScjjoFnG6nGLyPFR0UiEvDwVah4Tw==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "win32" @@ -5001,11 +5011,12 @@ "dev": true }, "node_modules/next": { - "version": "14.2.21", - "resolved": "https://registry.npmjs.org/next/-/next-14.2.21.tgz", - "integrity": "sha512-rZmLwucLHr3/zfDMYbJXbw0ZeoBpirxkXuvsJbk7UPorvPYZhP7vq7aHbKnU7dQNCYIimRrbB2pp3xmf+wsYUg==", + "version": "14.2.25", + "resolved": "https://registry.npmjs.org/next/-/next-14.2.25.tgz", + "integrity": "sha512-N5M7xMc4wSb4IkPvEV5X2BRRXUmhVHNyaXwEM86+voXthSZz8ZiRyQW4p9mwAoAPIm6OzuVZtn7idgEJeAJN3Q==", + "license": "MIT", "dependencies": { - "@next/env": "14.2.21", + "@next/env": "14.2.25", "@swc/helpers": "0.5.5", "busboy": "1.6.0", "caniuse-lite": "^1.0.30001579", @@ -5020,15 +5031,15 @@ "node": ">=18.17.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "14.2.21", - "@next/swc-darwin-x64": "14.2.21", - "@next/swc-linux-arm64-gnu": "14.2.21", - "@next/swc-linux-arm64-musl": "14.2.21", - "@next/swc-linux-x64-gnu": "14.2.21", - "@next/swc-linux-x64-musl": "14.2.21", - "@next/swc-win32-arm64-msvc": "14.2.21", - "@next/swc-win32-ia32-msvc": "14.2.21", - "@next/swc-win32-x64-msvc": "14.2.21" + "@next/swc-darwin-arm64": "14.2.25", + "@next/swc-darwin-x64": "14.2.25", + "@next/swc-linux-arm64-gnu": "14.2.25", + "@next/swc-linux-arm64-musl": "14.2.25", + "@next/swc-linux-x64-gnu": "14.2.25", + "@next/swc-linux-x64-musl": "14.2.25", + "@next/swc-win32-arm64-msvc": "14.2.25", + "@next/swc-win32-ia32-msvc": "14.2.25", + "@next/swc-win32-x64-msvc": "14.2.25" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", diff --git a/ui/litellm-dashboard/package.json b/ui/litellm-dashboard/package.json index 79f096106d..895e2576cc 100644 --- a/ui/litellm-dashboard/package.json +++ b/ui/litellm-dashboard/package.json @@ -22,7 +22,7 @@ "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", "moment": "^2.30.1", - "next": "^14.2.15", + "next": "^14.2.25", "openai": "^4.28.0", "papaparse": "^5.5.2", "react": "^18",