diff --git a/.circleci/config.yml b/.circleci/config.yml index 40d498d6e..4fad4111d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -243,7 +243,102 @@ jobs: command: | pwd ls - python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests + no_output_timeout: 120m + + # Store test results + - store_test_results: + path: test-results + proxy_log_to_otel_tests: + machine: + image: ubuntu-2204:2023.10.1 + resource_class: xlarge + working_directory: ~/project + steps: + - checkout + - run: + name: Install Docker CLI (In case it's not already installed) + command: | + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + - run: + name: Install Python 3.9 + command: | + curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + export PATH="$HOME/miniconda/bin:$PATH" + conda init bash + source ~/.bashrc + conda create -n myenv python=3.9 -y + conda activate myenv + python --version + - run: + name: Install Dependencies + command: | + pip install "pytest==7.3.1" + pip install "pytest-asyncio==0.21.1" + pip install aiohttp + pip install openai + python -m pip install --upgrade pip + python -m pip install -r .circleci/requirements.txt + pip install "pytest==7.3.1" + pip install "pytest-mock==3.12.0" + pip install "pytest-asyncio==0.21.1" + pip install mypy + pip install pyarrow + pip install numpydoc + pip install prisma + pip install fastapi + pip install jsonschema + pip install "httpx==0.24.1" + pip install "anyio==3.7.1" + pip install "asyncio==3.4.3" + pip install "PyGithub==1.59.1" + - run: + name: Build Docker image + command: docker build -t my-app:latest -f Dockerfile.database . + - run: + name: Run Docker container + # intentionally give bad redis credentials here + # the OTEL test - should get this as a trace + command: | + docker run -d \ + -p 4000:4000 \ + -e DATABASE_URL=$PROXY_DATABASE_URL \ + -e REDIS_HOST=$REDIS_HOST \ + -e REDIS_PASSWORD=$REDIS_PASSWORD \ + -e REDIS_PORT=$REDIS_PORT \ + -e LITELLM_MASTER_KEY="sk-1234" \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LITELLM_LICENSE=$LITELLM_LICENSE \ + -e OTEL_EXPORTER="in_memory" \ + --name my-app \ + -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ + my-app:latest \ + --config /app/config.yaml \ + --port 4000 \ + --detailed_debug \ + - run: + name: Install curl and dockerize + command: | + sudo apt-get update + sudo apt-get install -y curl + sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz + sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz + sudo rm dockerize-linux-amd64-v0.6.1.tar.gz + - run: + name: Start outputting logs + command: docker logs -f my-app + background: true + - run: + name: Wait for app to be ready + command: dockerize -wait http://localhost:4000 -timeout 5m + - run: + name: Run tests + command: | + pwd + ls + python -m pytest -vv tests/otel_tests/test_otel.py -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results @@ -337,6 +432,12 @@ workflows: only: - main - /litellm_.*/ + - proxy_log_to_otel_tests: + filters: + branches: + only: + - main + - /litellm_.*/ - installing_litellm_on_python: filters: branches: @@ -347,6 +448,7 @@ workflows: requires: - local_testing - build_and_test + - proxy_log_to_otel_tests filters: branches: only: diff --git a/deploy/charts/litellm-helm/index.yaml b/deploy/charts/litellm-helm/index.yaml deleted file mode 100644 index 5c6b75454..000000000 --- a/deploy/charts/litellm-helm/index.yaml +++ /dev/null @@ -1,88 +0,0 @@ -apiVersion: v1 -entries: - postgresql: - - annotations: - category: Database - images: | - - name: os-shell - image: docker.io/bitnami/os-shell:12-debian-12-r16 - - name: postgres-exporter - image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14 - - name: postgresql - image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6 - licenses: Apache-2.0 - apiVersion: v2 - appVersion: 16.2.0 - created: "2024-07-08T11:05:19.312515+08:00" - dependencies: - - name: common - repository: oci://registry-1.docker.io/bitnamicharts - tags: - - bitnami-common - version: 2.x.x - description: PostgreSQL (Postgres) is an open source object-relational database - known for reliability and data integrity. ACID-compliant, it supports foreign - keys, joins, views, triggers and stored procedures. - digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167 - home: https://bitnami.com - icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png - keywords: - - postgresql - - postgres - - database - - sql - - replication - - cluster - maintainers: - - name: VMware, Inc. - url: https://github.com/bitnami/charts - name: postgresql - sources: - - https://github.com/bitnami/charts/tree/main/bitnami/postgresql - urls: - - https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz - version: 14.3.1 - redis: - - annotations: - category: Database - images: | - - name: kubectl - image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3 - - name: os-shell - image: docker.io/bitnami/os-shell:12-debian-12-r16 - - name: redis - image: docker.io/bitnami/redis:7.2.4-debian-12-r9 - - name: redis-exporter - image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4 - - name: redis-sentinel - image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7 - licenses: Apache-2.0 - apiVersion: v2 - appVersion: 7.2.4 - created: "2024-07-08T11:05:19.317065+08:00" - dependencies: - - name: common - repository: oci://registry-1.docker.io/bitnamicharts - tags: - - bitnami-common - version: 2.x.x - description: Redis(R) is an open source, advanced key-value store. It is often - referred to as a data structure server since keys can contain strings, hashes, - lists, sets and sorted sets. - digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984 - home: https://bitnami.com - icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png - keywords: - - redis - - keyvalue - - database - maintainers: - - name: VMware, Inc. - url: https://github.com/bitnami/charts - name: redis - sources: - - https://github.com/bitnami/charts/tree/main/bitnami/redis - urls: - - https://berriai.github.io/litellm/charts/redis-18.19.1.tgz - version: 18.19.1 -generated: "2024-07-08T11:05:19.308028+08:00" diff --git a/docs/my-website/docs/anthropic_completion.md b/docs/my-website/docs/anthropic_completion.md new file mode 100644 index 000000000..ca65f3f6f --- /dev/null +++ b/docs/my-website/docs/anthropic_completion.md @@ -0,0 +1,54 @@ +# [BETA] Anthropic `/v1/messages` + +Call 100+ LLMs in the Anthropic format. + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: my-test-model + litellm_params: + model: gpt-3.5-turbo +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/v1/messages' \ +-H 'x-api-key: sk-1234' \ +-H 'content-type: application/json' \ +-D '{ + "model": "my-test-model", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] +}' +``` + +## Test with Anthropic SDK + +```python +import os +from anthropic import Anthropic + +client = Anthropic(api_key="sk-1234", base_url="http://0.0.0.0:4000") # 👈 CONNECT TO PROXY + +message = client.messages.create( + messages=[ + { + "role": "user", + "content": "Hello, Claude", + } + ], + model="my-test-model", # 👈 set 'model_name' +) +print(message.content) +``` \ No newline at end of file diff --git a/docs/my-website/docs/assistants.md b/docs/my-website/docs/assistants.md index cfbc4c11a..fb30a132f 100644 --- a/docs/my-website/docs/assistants.md +++ b/docs/my-website/docs/assistants.md @@ -26,6 +26,7 @@ Call an existing Assistant. - Run the Assistant on the Thread to generate a response by calling the model and the tools. +### SDK + PROXY @@ -281,3 +282,31 @@ curl -X POST 'http://0.0.0.0:4000/threads/{thread_id}/runs' \ ## [👉 Proxy API Reference](https://litellm-api.up.railway.app/#/assistants) + +## OpenAI-Compatible APIs + +To call openai-compatible Assistants API's (eg. Astra Assistants API), just add `openai/` to the model name: + + +**config** +```yaml +assistant_settings: + custom_llm_provider: openai + litellm_params: + api_key: os.environ/ASTRA_API_KEY + api_base: os.environ/ASTRA_API_BASE +``` + +**curl** + +```bash +curl -X POST "http://localhost:4000/v1/assistants" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "instructions": "You are a personal math tutor. When asked a question, write and run Python code to answer the question.", + "name": "Math Tutor", + "tools": [{"type": "code_interpreter"}], + "model": "openai/" + }' +``` \ No newline at end of file diff --git a/docs/my-website/docs/data_security.md b/docs/my-website/docs/data_security.md new file mode 100644 index 000000000..b2d32b6e5 --- /dev/null +++ b/docs/my-website/docs/data_security.md @@ -0,0 +1,34 @@ +# Data Privacy and Security + +## Security Measures + +### LiteLLM Cloud + +- We encrypt all data stored using your `LITELLM_MASTER_KEY` and in transit using TLS. +- Our database and application run on GCP, AWS infrastructure, partly managed by NeonDB. + - US data region: Northern California (AWS/GCP `us-west-1`) & Virginia (AWS `us-east-1`) + - EU data region Germany/Frankfurt (AWS/GCP `eu-central-1`) +- All users have access to SSO (Single Sign-On) through OAuth 2.0 with Google, Okta, Microsoft, KeyCloak. +- Audit Logs with retention policy +- Control Allowed IP Addresses that can access your Cloud LiteLLM Instance + +For security inquiries, please contact us at support@berri.ai + +### Supported data regions for LiteLLM Cloud + +LiteLLM supports the following data regions: + +- US, Northern California (AWS/GCP `us-west-1`) +- Europe, Frankfurt, Germany (AWS/GCP `eu-central-1`) + +All data, user accounts, and infrastructure are completely separated between these two regions + +### Security Vulnerability Reporting Guidelines + +We value the security community's role in protecting our systems and users. To report a security vulnerability: + +- Email support@berri.ai with details +- Include steps to reproduce the issue +- Provide any relevant additional information + +We'll review all reports promptly. Note that we don't currently offer a bug bounty program. diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md index 7035b25ce..f33e2dda9 100644 --- a/docs/my-website/docs/enterprise.md +++ b/docs/my-website/docs/enterprise.md @@ -24,6 +24,7 @@ This covers: - ✅ [JWT-Auth](../docs/proxy/token_auth.md) - ✅ [Control available public, private routes](./proxy/enterprise#control-available-public-private-routes) - ✅ [[BETA] AWS Key Manager v2 - Key Decryption](./proxy/enterprise#beta-aws-key-manager---key-decryption) + - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests) diff --git a/docs/my-website/docs/hosted.md b/docs/my-website/docs/hosted.md index 92940e858..99bfe9903 100644 --- a/docs/my-website/docs/hosted.md +++ b/docs/my-website/docs/hosted.md @@ -21,6 +21,14 @@ See our status page for [**live reliability**](https://status.litellm.ai/) - **Reliable**: Our hosted proxy is tested on 1k requests per second, making it reliable for high load. - **Secure**: LiteLLM is currently undergoing SOC-2 compliance, to make sure your data is as secure as possible. +## Data Privacy & Security + +You can find our [data privacy & security policy for cloud litellm here](../docs/data_security#litellm-cloud) + +## Supported data regions for LiteLLM Cloud + +You can find [supported data regions litellm here](../docs/data_security#supported-data-regions-for-litellm-cloud) + ### Pricing Pricing is based on usage. We can figure out a price that works for your team, on the call. diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index 8aaf36c3e..f30b4b978 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -18,6 +18,7 @@ Features: - ✅ [JWT-Auth](../docs/proxy/token_auth.md) - ✅ [Control available public, private routes](#control-available-public-private-routes) - ✅ [[BETA] AWS Key Manager v2 - Key Decryption](#beta-aws-key-manager---key-decryption) + - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index c67302e0d..1e2d4945b 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -112,37 +112,52 @@ model_list: mode: completion # 👈 ADD THIS ``` +### Speech to Text Models + +```yaml +model_list: + - model_name: whisper + litellm_params: + model: whisper-1 + api_key: os.environ/OPENAI_API_KEY + model_info: + mode: audio_transcription +``` + + ## `/health/readiness` Unprotected endpoint for checking if proxy is ready to accept requests Example Request: -```bash -curl --location 'http://0.0.0.0:4000/health/readiness' +```bash +curl http://0.0.0.0:4000/health/readiness ``` Example Response: -*If proxy connected to a database* - ```json { - "status": "healthy", - "db": "connected", - "litellm_version":"1.19.2", + "status": "connected", + "db": "connected", + "cache": null, + "litellm_version": "1.40.21", + "success_callbacks": [ + "langfuse", + "_PROXY_track_cost_callback", + "response_taking_too_long_callback", + "_PROXY_MaxParallelRequestsHandler", + "_PROXY_MaxBudgetLimiter", + "_PROXY_CacheControlCheck", + "ServiceLogging" + ], + "last_updated": "2024-07-10T18:59:10.616968" } ``` -*If proxy not connected to a database* - -```json -{ - "status": "healthy", - "db": "Not connected", - "litellm_version":"1.19.2", -} -``` +If the proxy is not connected to a database, then the `"db"` field will be `"Not +connected"` instead of `"connected"` and the `"last_updated"` field will not be present. ## `/health/liveliness` diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 4ae3ab977..27f1789e0 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -1,27 +1,19 @@ +# 🪢 Logging + +Log Proxy input, output, and exceptions using: + +- Langfuse +- OpenTelemetry +- Custom Callbacks +- DataDog +- DynamoDB +- s3 Bucket +- etc. + import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -# 🪢 Logging - Langfuse, OpenTelemetry, Custom Callbacks, DataDog, s3 Bucket, Sentry, Athina, Azure Content-Safety - -Log Proxy Input, Output, Exceptions using Langfuse, OpenTelemetry, Custom Callbacks, DataDog, DynamoDB, s3 Bucket - -## Table of Contents - -- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse) -- [Logging with OpenTelemetry (OpenTelemetry)](#logging-proxy-inputoutput-in-opentelemetry-format) -- [Async Custom Callbacks](#custom-callback-class-async) -- [Async Custom Callback APIs](#custom-callback-apis-async) -- [Logging to Galileo](#logging-llm-io-to-galileo) -- [Logging to OpenMeter](#logging-proxy-inputoutput---langfuse) -- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets) -- [Logging to DataDog](#logging-proxy-inputoutput---datadog) -- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb) -- [Logging to Sentry](#logging-proxy-inputoutput---sentry) -- [Logging to Athina](#logging-proxy-inputoutput-athina) -- [(BETA) Moderation with Azure Content-Safety](#moderation-with-azure-content-safety) - ## Getting the LiteLLM Call ID LiteLLM generates a unique `call_id` for each request. This `call_id` can be @@ -56,6 +48,7 @@ A number of these headers could be useful for troubleshooting, but the components in your system, including in logging tools. ## Logging Proxy Input/Output - Langfuse + We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment **Step 1** Install langfuse @@ -65,6 +58,7 @@ pip install langfuse>=2.0.0 ``` **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -75,6 +69,7 @@ litellm_settings: ``` **Step 3**: Set required env variables for logging to langfuse + ```shell export LANGFUSE_PUBLIC_KEY="pk_kk" export LANGFUSE_SECRET_KEY="sk_ss" @@ -85,11 +80,13 @@ export LANGFUSE_HOST="https://xxx.langfuse.com" **Step 4**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ``` litellm --test ``` @@ -100,7 +97,6 @@ Expected output on Langfuse ### Logging Metadata to Langfuse - @@ -126,6 +122,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ } }' ``` + @@ -159,6 +156,7 @@ response = client.chat.completions.create( print(response) ``` + @@ -201,7 +199,6 @@ print(response) - ### Team based Logging to Langfuse **Example:** @@ -290,6 +287,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ } }' ``` + @@ -320,6 +318,7 @@ response = client.chat.completions.create( print(response) ``` + @@ -365,7 +364,6 @@ You will see `raw_request` in your Langfuse Metadata. This is the RAW CURL comma - ## Logging Proxy Input/Output in OpenTelemetry format :::info @@ -381,10 +379,8 @@ OTEL_SERVICE_NAME=` # default="litellm" - - **Step 1:** Set callbacks and env vars Add the following to your env @@ -400,7 +396,6 @@ litellm_settings: callbacks: ["otel"] ``` - **Step 2**: Start the proxy, make a test request Start proxy @@ -460,7 +455,6 @@ This is the Span from OTEL Logging - #### Quick Start - Log to Honeycomb @@ -482,7 +476,6 @@ litellm_settings: callbacks: ["otel"] ``` - **Step 2**: Start the proxy, make a test request Start proxy @@ -507,10 +500,8 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ }' ``` - - #### Quick Start - Log to OTEL Collector @@ -532,7 +523,6 @@ litellm_settings: callbacks: ["otel"] ``` - **Step 2**: Start the proxy, make a test request Start proxy @@ -559,7 +549,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ - #### Quick Start - Log to OTEL GRPC Collector @@ -581,7 +570,6 @@ litellm_settings: callbacks: ["otel"] ``` - **Step 2**: Start the proxy, make a test request Start proxy @@ -606,7 +594,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ }' ``` - @@ -629,7 +616,6 @@ environment_variables: TRACELOOP_API_KEY: "XXXXX" ``` - **Step 3**: Start the proxy, make a test request Start proxy @@ -665,11 +651,15 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ ❓ Use this when you want to **pass information about the incoming request in a distributed tracing system** ✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header) + ```curl traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01 ``` + Example Usage + 1. Make Request to LiteLLM Proxy with `traceparent` header + ```python import openai import uuid @@ -693,7 +683,6 @@ response = client.chat.completions.create( ) print(response) - ``` ```shell @@ -707,12 +696,12 @@ Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector - - ## Custom Callback Class [Async] + Use this when you want to run custom callbacks in `python` #### Step 1 - Create your custom `litellm` callback class + We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)** Define your custom callback class in a python file. @@ -815,16 +804,17 @@ proxy_handler_instance = MyCustomHandler() ``` #### Step 2 - Pass your custom callback class in `config.yaml` + We pass the custom callback class defined in **Step1** to the config.yaml. Set `callbacks` to `python_filename.logger_instance_name` In the config below, we pass + - python_filename: `custom_callbacks.py` - logger_instance_name: `proxy_handler_instance`. This is defined in Step 1 `callbacks: custom_callbacks.proxy_handler_instance` - ```yaml model_list: - model_name: gpt-3.5-turbo @@ -837,6 +827,7 @@ litellm_settings: ``` #### Step 3 - Start proxy + test request + ```shell litellm --config proxy_config.yaml ``` @@ -858,6 +849,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ ``` #### Resulting Log on Proxy + ```shell On Success Model: gpt-3.5-turbo, @@ -910,7 +902,6 @@ class MyCustomHandler(CustomLogger): "max_tokens": 10 } } - ``` #### Logging `model_info` set in config.yaml @@ -928,11 +919,13 @@ class MyCustomHandler(CustomLogger): ``` **Expected Output** + ```json {'mode': 'embedding', 'input_cost_per_token': 0.002} ``` ### Logging responses from proxy + Both `/chat/completions` and `/embeddings` responses are available as `response_obj` **Note: for `/chat/completions`, both `stream=True` and `non stream` responses are available as `response_obj`** @@ -946,6 +939,7 @@ class MyCustomHandler(CustomLogger): ``` **Expected Output /chat/completion [for both `stream` and `non-stream` responses]** + ```json ModelResponse( id='chatcmpl-8Tfu8GoMElwOZuj2JlHBhNHG01PPo', @@ -972,6 +966,7 @@ ModelResponse( ``` **Expected Output /embeddings** + ```json { 'model': 'ada', @@ -991,7 +986,6 @@ ModelResponse( } ``` - ## Custom Callback APIs [Async] :::info @@ -1001,10 +995,12 @@ This is an Enterprise only feature [Get Started with Enterprise here](https://gi ::: Use this if you: + - Want to use custom callbacks written in a non Python programming language - Want your callbacks to run on a different microservice #### Step 1. Create your generic logging API endpoint + Set up a generic API endpoint that can receive data in JSON format. The data will be included within a "data" field. Your server should support the following Request format: @@ -1067,11 +1063,8 @@ async def log_event(request: Request): if __name__ == "__main__": import uvicorn uvicorn.run(app, host="127.0.0.1", port=4000) - - ``` - #### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to ```shell @@ -1081,6 +1074,7 @@ os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:4000/log-event" #### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"] Example litellm proxy config.yaml + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1092,8 +1086,8 @@ litellm_settings: Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API - ## Logging LLM IO to Galileo + [BETA] Log LLM I/O on [www.rungalileo.io](https://www.rungalileo.io/) @@ -1116,6 +1110,7 @@ export GALILEO_PASSWORD="" ### Quick Start 1. Add to Config.yaml + ```yaml model_list: - litellm_params: @@ -1151,7 +1146,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ ' ``` - 🎉 That's it - Expect to see your Logs on your Galileo Dashboard ## Logging Proxy Cost + Usage - OpenMeter @@ -1169,6 +1163,7 @@ export OPENMETER_API_KEY="" ### Quick Start 1. Add to Config.yaml + ```yaml model_list: - litellm_params: @@ -1204,13 +1199,14 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ ' ``` - ## Logging Proxy Input/Output - DataDog + We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog **Step 1**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1230,6 +1226,7 @@ DD_SITE="us5.datadoghq.com" # your datadog base url **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` @@ -1257,10 +1254,10 @@ Expected output on Datadog - ## Logging Proxy Input/Output - s3 Buckets We will use the `--config` to set + - `litellm.success_callback = ["s3"]` This will log all successfull LLM calls to s3 Bucket @@ -1274,6 +1271,7 @@ AWS_REGION_NAME = "" ``` **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1293,11 +1291,13 @@ litellm_settings: **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ```shell curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -1317,6 +1317,7 @@ Your logs should be available on the specified s3 Bucket ## Logging Proxy Input/Output - DynamoDB We will use the `--config` to set + - `litellm.success_callback = ["dynamodb"]` - `litellm.dynamodb_table_name = "your-table-name"` @@ -1331,6 +1332,7 @@ AWS_REGION_NAME = "" ``` **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1344,11 +1346,13 @@ litellm_settings: **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ```shell curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -1436,19 +1440,18 @@ Your logs should be available on DynamoDB } ``` - - - ## Logging Proxy Input/Output - Sentry If api calls fail (llm/database) you can log those to Sentry: **Step 1** Install Sentry + ```shell pip install --upgrade sentry-sdk ``` **Step 2**: Save your Sentry_DSN and add `litellm_settings`: `failure_callback` + ```shell export SENTRY_DSN="your-sentry-dsn" ``` @@ -1468,11 +1471,13 @@ general_settings: **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ``` litellm --test ``` @@ -1490,6 +1495,7 @@ ATHINA_API_KEY = "your-athina-api-key" ``` **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1502,11 +1508,13 @@ litellm_settings: **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ``` curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -1538,6 +1546,7 @@ AZURE_CONTENT_SAFETY_KEY = "" ``` **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -1553,11 +1562,13 @@ litellm_settings: **Step 3**: Start the proxy, make a test request Start proxy + ```shell litellm --config config.yaml --debug ``` Test Request + ``` curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -1573,7 +1584,8 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ ``` An HTTP 400 error will be returned if the content is detected with a value greater than the threshold set in the `config.yaml`. -The details of the response will describe : +The details of the response will describe: + - The `source` : input text or llm generated text - The `category` : the category of the content that triggered the moderation - The `severity` : the severity from 0 to 10 diff --git a/docs/my-website/docs/proxy/model_management.md b/docs/my-website/docs/proxy/model_management.md index 61d7e0882..02ce4ba23 100644 --- a/docs/my-website/docs/proxy/model_management.md +++ b/docs/my-website/docs/proxy/model_management.md @@ -15,9 +15,9 @@ model_list: metadata: "here's additional metadata on the model" # returned via GET /model/info ``` -## Get Model Information +## Get Model Information - `/model/info` -Retrieve detailed information about each model listed in the `/models` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes. +Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes. =13.3.0' + - condition: redis.enabled + name: redis + repository: oci://registry-1.docker.io/bitnamicharts + version: '>=18.0.0' + description: Call all LLM APIs using the OpenAI format + digest: eeff5e4e6cebb4c977cb7359c1ec6c773c66982f6aa39dbed94a674890144a43 + name: litellm-helm + type: application + urls: + - https://berriai.github.io/litellm/litellm-helm-0.2.1.tgz + version: 0.2.1 - apiVersion: v2 appVersion: v1.35.38 created: "2024-05-06T10:22:24.384392-07:00" @@ -33,7 +52,7 @@ entries: licenses: Apache-2.0 apiVersion: v2 appVersion: 16.2.0 - created: "2024-05-06T10:22:24.387717-07:00" + created: "2024-07-10T00:59:11.191731+08:00" dependencies: - name: common repository: oci://registry-1.docker.io/bitnamicharts @@ -60,7 +79,7 @@ entries: sources: - https://github.com/bitnami/charts/tree/main/bitnami/postgresql urls: - - charts/postgresql-14.3.1.tgz + - https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz version: 14.3.1 redis: - annotations: @@ -79,7 +98,7 @@ entries: licenses: Apache-2.0 apiVersion: v2 appVersion: 7.2.4 - created: "2024-05-06T10:22:24.391903-07:00" + created: "2024-07-10T00:59:11.195667+08:00" dependencies: - name: common repository: oci://registry-1.docker.io/bitnamicharts @@ -103,6 +122,6 @@ entries: sources: - https://github.com/bitnami/charts/tree/main/bitnami/redis urls: - - charts/redis-18.19.1.tgz + - https://berriai.github.io/litellm/charts/redis-18.19.1.tgz version: 18.19.1 -generated: "2024-05-06T10:22:24.375026-07:00" +generated: "2024-07-10T00:59:11.179952+08:00" diff --git a/litellm-helm-0.2.1.tgz b/litellm-helm-0.2.1.tgz new file mode 100644 index 000000000..acecd9f94 Binary files /dev/null and b/litellm-helm-0.2.1.tgz differ diff --git a/litellm/__init__.py b/litellm/__init__.py index f0dab5e29..1944f886b 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -364,7 +364,7 @@ for key, value in model_cost.items(): elif value.get("litellm_provider") == "mistral": mistral_chat_models.append(key) elif value.get("litellm_provider") == "anthropic": - anthropic_models.append(key) + anthropic_models.append(key) elif value.get("litellm_provider") == "empower": empower_models.append(key) elif value.get("litellm_provider") == "openrouter": @@ -789,6 +789,7 @@ from .utils import ( get_api_base, get_first_chars_messages, ModelResponse, + EmbeddingResponse, ImageResponse, get_provider_fields, ) @@ -879,5 +880,11 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * +from .files.main import * from .scheduler import * from .cost_calculator import response_cost_calculator, cost_per_token + +### ADAPTERS ### +from .types.adapter import AdapterItem + +adapters: List[AdapterItem] = [] diff --git a/litellm/adapters/anthropic_adapter.py b/litellm/adapters/anthropic_adapter.py new file mode 100644 index 000000000..7d9d799b6 --- /dev/null +++ b/litellm/adapters/anthropic_adapter.py @@ -0,0 +1,50 @@ +# What is this? +## Translates OpenAI call to Anthropic `/v1/messages` format +import json +import os +import traceback +import uuid +from typing import Literal, Optional + +import dotenv +import httpx +from pydantic import BaseModel + +import litellm +from litellm import ChatCompletionRequest, verbose_logger +from litellm.integrations.custom_logger import CustomLogger +from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse + + +class AnthropicAdapter(CustomLogger): + def __init__(self) -> None: + super().__init__() + + def translate_completion_input_params( + self, kwargs + ) -> Optional[ChatCompletionRequest]: + """ + - translate params, where needed + - pass rest, as is + """ + request_body = AnthropicMessagesRequest(**kwargs) # type: ignore + + translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai( + anthropic_message_request=request_body + ) + + return translated_body + + def translate_completion_output_params( + self, response: litellm.ModelResponse + ) -> Optional[AnthropicResponse]: + + return litellm.AnthropicConfig().translate_openai_response_to_anthropic( + response=response + ) + + def translate_completion_output_params_streaming(self) -> Optional[BaseModel]: + return super().translate_completion_output_params_streaming() + + +anthropic_adapter = AnthropicAdapter() diff --git a/litellm/batches/main.py b/litellm/batches/main.py index 4043606d5..af2dc5059 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -10,296 +10,37 @@ https://platform.openai.com/docs/api-reference/batch """ -import os import asyncio -from functools import partial import contextvars -from typing import Literal, Optional, Dict, Coroutine, Any, Union +import os +from functools import partial +from typing import Any, Coroutine, Dict, Literal, Optional, Union + import httpx import litellm from litellm import client from litellm.utils import supports_httpx_timeout -from ..types.router import * + from ..llms.openai import OpenAIBatchesAPI, OpenAIFilesAPI from ..types.llms.openai import ( - CreateBatchRequest, - RetrieveBatchRequest, - CancelBatchRequest, - CreateFileRequest, - FileTypes, - FileObject, Batch, + CancelBatchRequest, + CreateBatchRequest, + CreateFileRequest, FileContentRequest, + FileObject, + FileTypes, HttpxBinaryResponseContent, + RetrieveBatchRequest, ) +from ..types.router import * ####### ENVIRONMENT VARIABLES ################### openai_batches_instance = OpenAIBatchesAPI() -openai_files_instance = OpenAIFilesAPI() ################################################# -async def acreate_file( - file: FileTypes, - purpose: Literal["assistants", "batch", "fine-tune"], - custom_llm_provider: Literal["openai"] = "openai", - extra_headers: Optional[Dict[str, str]] = None, - extra_body: Optional[Dict[str, str]] = None, - **kwargs, -) -> Coroutine[Any, Any, FileObject]: - """ - Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. - - LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files - """ - try: - loop = asyncio.get_event_loop() - kwargs["acreate_file"] = True - - # Use a partial function to pass your keyword arguments - func = partial( - create_file, - file, - purpose, - custom_llm_provider, - extra_headers, - extra_body, - **kwargs, - ) - - # Add the context to the function - ctx = contextvars.copy_context() - func_with_context = partial(ctx.run, func) - init_response = await loop.run_in_executor(None, func_with_context) - if asyncio.iscoroutine(init_response): - response = await init_response - else: - response = init_response # type: ignore - - return response - except Exception as e: - raise e - - -def create_file( - file: FileTypes, - purpose: Literal["assistants", "batch", "fine-tune"], - custom_llm_provider: Literal["openai"] = "openai", - extra_headers: Optional[Dict[str, str]] = None, - extra_body: Optional[Dict[str, str]] = None, - **kwargs, -) -> Union[FileObject, Coroutine[Any, Any, FileObject]]: - """ - Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. - - LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files - """ - try: - optional_params = GenericLiteLLMParams(**kwargs) - if custom_llm_provider == "openai": - # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there - api_base = ( - optional_params.api_base - or litellm.api_base - or os.getenv("OPENAI_API_BASE") - or "https://api.openai.com/v1" - ) - organization = ( - optional_params.organization - or litellm.organization - or os.getenv("OPENAI_ORGANIZATION", None) - or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 - ) - # set API KEY - api_key = ( - optional_params.api_key - or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there - or litellm.openai_key - or os.getenv("OPENAI_API_KEY") - ) - ### TIMEOUT LOGIC ### - timeout = ( - optional_params.timeout or kwargs.get("request_timeout", 600) or 600 - ) - # set timeout for 10 minutes by default - - if ( - timeout is not None - and isinstance(timeout, httpx.Timeout) - and supports_httpx_timeout(custom_llm_provider) == False - ): - read_timeout = timeout.read or 600 - timeout = read_timeout # default 10 min timeout - elif timeout is not None and not isinstance(timeout, httpx.Timeout): - timeout = float(timeout) # type: ignore - elif timeout is None: - timeout = 600.0 - - _create_file_request = CreateFileRequest( - file=file, - purpose=purpose, - extra_headers=extra_headers, - extra_body=extra_body, - ) - - _is_async = kwargs.pop("acreate_file", False) is True - - response = openai_files_instance.create_file( - _is_async=_is_async, - api_base=api_base, - api_key=api_key, - timeout=timeout, - max_retries=optional_params.max_retries, - organization=organization, - create_file_data=_create_file_request, - ) - else: - raise litellm.exceptions.BadRequestError( - message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( - custom_llm_provider - ), - model="n/a", - llm_provider=custom_llm_provider, - response=httpx.Response( - status_code=400, - content="Unsupported provider", - request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - ) - return response - except Exception as e: - raise e - - -async def afile_content( - file_id: str, - custom_llm_provider: Literal["openai"] = "openai", - extra_headers: Optional[Dict[str, str]] = None, - extra_body: Optional[Dict[str, str]] = None, - **kwargs, -) -> Coroutine[Any, Any, HttpxBinaryResponseContent]: - """ - Async: Get file contents - - LiteLLM Equivalent of GET https://api.openai.com/v1/files - """ - try: - loop = asyncio.get_event_loop() - kwargs["afile_content"] = True - - # Use a partial function to pass your keyword arguments - func = partial( - file_content, - file_id, - custom_llm_provider, - extra_headers, - extra_body, - **kwargs, - ) - - # Add the context to the function - ctx = contextvars.copy_context() - func_with_context = partial(ctx.run, func) - init_response = await loop.run_in_executor(None, func_with_context) - if asyncio.iscoroutine(init_response): - response = await init_response - else: - response = init_response # type: ignore - - return response - except Exception as e: - raise e - - -def file_content( - file_id: str, - custom_llm_provider: Literal["openai"] = "openai", - extra_headers: Optional[Dict[str, str]] = None, - extra_body: Optional[Dict[str, str]] = None, - **kwargs, -) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]: - """ - Returns the contents of the specified file. - - LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files - """ - try: - optional_params = GenericLiteLLMParams(**kwargs) - if custom_llm_provider == "openai": - # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there - api_base = ( - optional_params.api_base - or litellm.api_base - or os.getenv("OPENAI_API_BASE") - or "https://api.openai.com/v1" - ) - organization = ( - optional_params.organization - or litellm.organization - or os.getenv("OPENAI_ORGANIZATION", None) - or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 - ) - # set API KEY - api_key = ( - optional_params.api_key - or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there - or litellm.openai_key - or os.getenv("OPENAI_API_KEY") - ) - ### TIMEOUT LOGIC ### - timeout = ( - optional_params.timeout or kwargs.get("request_timeout", 600) or 600 - ) - # set timeout for 10 minutes by default - - if ( - timeout is not None - and isinstance(timeout, httpx.Timeout) - and supports_httpx_timeout(custom_llm_provider) == False - ): - read_timeout = timeout.read or 600 - timeout = read_timeout # default 10 min timeout - elif timeout is not None and not isinstance(timeout, httpx.Timeout): - timeout = float(timeout) # type: ignore - elif timeout is None: - timeout = 600.0 - - _file_content_request = FileContentRequest( - file_id=file_id, - extra_headers=extra_headers, - extra_body=extra_body, - ) - - _is_async = kwargs.pop("afile_content", False) is True - - response = openai_files_instance.file_content( - _is_async=_is_async, - file_content_request=_file_content_request, - api_base=api_base, - api_key=api_key, - timeout=timeout, - max_retries=optional_params.max_retries, - organization=organization, - ) - else: - raise litellm.exceptions.BadRequestError( - message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( - custom_llm_provider - ), - model="n/a", - llm_provider=custom_llm_provider, - response=httpx.Response( - status_code=400, - content="Unsupported provider", - request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - ) - return response - except Exception as e: - raise e - - async def acreate_batch( completion_window: Literal["24h"], endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"], diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 0bc65a7f1..13a9e4bdc 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -15,10 +15,12 @@ from litellm.litellm_core_utils.llm_cost_calc.google import ( from litellm.litellm_core_utils.llm_cost_calc.google import ( cost_per_token as google_cost_per_token, ) +from litellm.litellm_core_utils.llm_cost_calc.google import ( + cost_router as google_cost_router, +) from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS - from litellm.utils import ( CallTypes, CostPerToken, @@ -160,22 +162,32 @@ def cost_per_token( # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models print_verbose(f"Looking up model={model} in model_cost_map") - if custom_llm_provider == "vertex_ai" and "claude" in model: - return google_cost_per_token( - model=model_without_prefix, - custom_llm_provider=custom_llm_provider, - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - ) if custom_llm_provider == "vertex_ai": - return google_cost_per_character( + cost_router = google_cost_router( model=model_without_prefix, custom_llm_provider=custom_llm_provider, prompt_characters=prompt_characters, completion_characters=completion_characters, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, + call_type=call_type, ) + if cost_router == "cost_per_character": + return google_cost_per_character( + model=model_without_prefix, + custom_llm_provider=custom_llm_provider, + prompt_characters=prompt_characters, + completion_characters=completion_characters, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) + elif cost_router == "cost_per_token": + return google_cost_per_token( + model=model_without_prefix, + custom_llm_provider=custom_llm_provider, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) elif custom_llm_provider == "gemini": return google_cost_per_token( model=model_without_prefix, diff --git a/litellm/files/main.py b/litellm/files/main.py new file mode 100644 index 000000000..598bc4878 --- /dev/null +++ b/litellm/files/main.py @@ -0,0 +1,659 @@ +""" +Main File for Files API implementation + +https://platform.openai.com/docs/api-reference/files + +""" + +import asyncio +import contextvars +import os +from functools import partial +from typing import Any, Coroutine, Dict, Literal, Optional, Union + +import httpx + +import litellm +from litellm import client +from litellm.llms.openai import FileDeleted, FileObject, OpenAIFilesAPI +from litellm.types.llms.openai import ( + Batch, + CreateFileRequest, + FileContentRequest, + FileTypes, + HttpxBinaryResponseContent, +) +from litellm.types.router import * +from litellm.utils import supports_httpx_timeout + +####### ENVIRONMENT VARIABLES ################### +openai_files_instance = OpenAIFilesAPI() +################################################# + + +async def afile_retrieve( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Coroutine[Any, Any, FileObject]: + """ + Async: Get file contents + + LiteLLM Equivalent of GET https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["is_async"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + file_retrieve, + file_id, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def file_retrieve( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> FileObject: + """ + Returns the contents of the specified file. + + LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("is_async", False) is True + + response = openai_files_instance.retrieve_file( + file_id=file_id, + _is_async=_is_async, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e + + +# Delete file +async def afile_delete( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Coroutine[Any, Any, FileObject]: + """ + Async: Delete file + + LiteLLM Equivalent of DELETE https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["is_async"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + file_delete, + file_id, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def file_delete( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> FileDeleted: + """ + Delete file + + LiteLLM Equivalent of DELETE https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("is_async", False) is True + + response = openai_files_instance.delete_file( + file_id=file_id, + _is_async=_is_async, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e + + +# List files +async def afile_list( + custom_llm_provider: Literal["openai"] = "openai", + purpose: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +): + """ + Async: List files + + LiteLLM Equivalent of GET https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["is_async"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + file_list, + custom_llm_provider, + purpose, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def file_list( + custom_llm_provider: Literal["openai"] = "openai", + purpose: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +): + """ + List files + + LiteLLM Equivalent of GET https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _is_async = kwargs.pop("is_async", False) is True + + response = openai_files_instance.list_files( + purpose=purpose, + _is_async=_is_async, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'file_list'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="file_list", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e + + +async def acreate_file( + file: FileTypes, + purpose: Literal["assistants", "batch", "fine-tune"], + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Coroutine[Any, Any, FileObject]: + """ + Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. + + LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["acreate_file"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + create_file, + file, + purpose, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def create_file( + file: FileTypes, + purpose: Literal["assistants", "batch", "fine-tune"], + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Union[FileObject, Coroutine[Any, Any, FileObject]]: + """ + Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. + + LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _create_file_request = CreateFileRequest( + file=file, + purpose=purpose, + extra_headers=extra_headers, + extra_body=extra_body, + ) + + _is_async = kwargs.pop("acreate_file", False) is True + + response = openai_files_instance.create_file( + _is_async=_is_async, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + create_file_data=_create_file_request, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e + + +async def afile_content( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Coroutine[Any, Any, HttpxBinaryResponseContent]: + """ + Async: Get file contents + + LiteLLM Equivalent of GET https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["afile_content"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + file_content, + file_id, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def file_content( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]: + """ + Returns the contents of the specified file. + + LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _file_content_request = FileContentRequest( + file_id=file_id, + extra_headers=extra_headers, + extra_body=extra_body, + ) + + _is_async = kwargs.pop("afile_content", False) is True + + response = openai_files_instance.file_content( + _is_async=_is_async, + file_content_request=_file_content_request, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index da9826b9b..be0263704 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -5,9 +5,12 @@ import traceback from typing import Literal, Optional, Union import dotenv +from pydantic import BaseModel from litellm.caching import DualCache from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.llms.openai import ChatCompletionRequest +from litellm.types.utils import ModelResponse class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class @@ -55,6 +58,30 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac def pre_call_check(self, deployment: dict) -> Optional[dict]: pass + #### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls + + def translate_completion_input_params( + self, kwargs + ) -> Optional[ChatCompletionRequest]: + """ + Translates the input params, from the provider's native format to the litellm.completion() format. + """ + pass + + def translate_completion_output_params( + self, response: ModelResponse + ) -> Optional[BaseModel]: + """ + Translates the output params, from the OpenAI format to the custom format. + """ + pass + + def translate_completion_output_params_streaming(self) -> Optional[BaseModel]: + """ + Translates the streaming chunk, from the OpenAI format to the custom format. + """ + pass + #### CALL HOOKS - proxy only #### """ Control the modify incoming / outgoung data before calling the model diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index e7b2f5e0b..a3fa1e2ce 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -326,7 +326,12 @@ class LangFuseLogger: or isinstance(value, int) or isinstance(value, float) ): - new_metadata[key] = copy.deepcopy(value) + try: + new_metadata[key] = copy.deepcopy(value) + except Exception as e: + verbose_logger.error( + f"Langfuse [Non-blocking error] - error copying metadata: {str(e)}" + ) metadata = new_metadata supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index c15161fc7..4ed561116 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -52,6 +52,12 @@ class OpenTelemetryConfig: OTEL_HEADERS gets sent as headers = {"x-honeycomb-team": "B85YgLm96******"} """ + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + + if os.getenv("OTEL_EXPORTER") == "in_memory": + return cls(exporter=InMemorySpanExporter()) return cls( exporter=os.getenv("OTEL_EXPORTER", "console"), endpoint=os.getenv("OTEL_ENDPOINT"), diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 04195705a..8ae23c629 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -675,7 +675,7 @@ class SlackAlerting(CustomLogger): async def failed_tracking_alert(self, error_message: str): """Raise alert when tracking failed for specific model""" _cache: DualCache = self.internal_usage_cache - message = "Failed Tracking Cost for" + error_message + message = "Failed Tracking Cost for " + error_message _cache_key = "budget_alerts:failed_tracking:{}".format(message) result = await _cache.async_get_cache(key=_cache_key) if result is None: @@ -1530,15 +1530,19 @@ Model Info: """Log deployment latency""" try: if "daily_reports" in self.alert_types: - model_id = ( - kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "") - ) + litellm_params = kwargs.get("litellm_params", {}) or {} + model_info = litellm_params.get("model_info", {}) or {} + model_id = model_info.get("id", "") or "" response_s: timedelta = end_time - start_time final_value = response_s total_tokens = 0 - if isinstance(response_obj, litellm.ModelResponse): + if isinstance(response_obj, litellm.ModelResponse) and ( + hasattr(response_obj, "usage") + and response_obj.usage is not None + and hasattr(response_obj.usage, "completion_tokens") + ): completion_tokens = response_obj.usage.completion_tokens if completion_tokens is not None and completion_tokens > 0: final_value = float( @@ -1557,8 +1561,7 @@ Model Info: ) except Exception as e: verbose_proxy_logger.error( - "[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: ", - e, + f"[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: {str(e)}" ) pass diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 0271c5714..0edc90325 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1275,7 +1275,7 @@ class Logging: f"Model={self.model}; cost={self.model_call_details['response_cost']}" ) except litellm.NotFoundError as e: - verbose_logger.error( + verbose_logger.warning( f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" ) self.model_call_details["response_cost"] = None diff --git a/litellm/litellm_core_utils/llm_cost_calc/google.py b/litellm/litellm_core_utils/llm_cost_calc/google.py index 2c958cf88..76da0da51 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/google.py +++ b/litellm/litellm_core_utils/llm_cost_calc/google.py @@ -1,7 +1,7 @@ # What is this? ## Cost calculation for Google AI Studio / Vertex AI models import traceback -from typing import List, Literal, Optional, Tuple +from typing import List, Literal, Optional, Tuple, Union import litellm from litellm import verbose_logger @@ -29,6 +29,32 @@ def _is_above_128k(tokens: float) -> bool: return False +def cost_router( + model: str, + custom_llm_provider: str, + prompt_tokens: float, + completion_tokens: float, + prompt_characters: float, + completion_characters: float, + call_type: Union[Literal["embedding", "aembedding"], str], +) -> Literal["cost_per_character", "cost_per_token"]: + """ + Route the cost calc to the right place, based on model/call_type/etc. + + Returns + - str, the specific google cost calc function it should route to. + """ + if custom_llm_provider == "vertex_ai" and "claude" in model: + return "cost_per_token" + elif custom_llm_provider == "gemini": + return "cost_per_token" + elif custom_llm_provider == "vertex_ai" and ( + call_type == "embedding" or call_type == "aembedding" + ): + return "cost_per_token" + return "cost_per_character" + + def cost_per_character( model: str, custom_llm_provider: str, diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py index a39a83f15..e65a81099 100644 --- a/litellm/llms/ai21.py +++ b/litellm/llms/ai21.py @@ -1,11 +1,16 @@ -import os, types, traceback import json +import os +import time # type: ignore +import traceback +import types from enum import Enum -import requests # type: ignore -import time, httpx # type: ignore from typing import Callable, Optional -from litellm.utils import ModelResponse, Choices, Message + +import httpx +import requests # type: ignore + import litellm +from litellm.utils import Choices, Message, ModelResponse class AI21Error(Exception): @@ -185,7 +190,7 @@ def completion( message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except Exception as e: raise AI21Error( message=traceback.format_exc(), status_code=response.status_code @@ -197,13 +202,17 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.created = int(time.time()) + model_response.model = model + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), + ) return model_response diff --git a/litellm/llms/aleph_alpha.py b/litellm/llms/aleph_alpha.py index 7edd11964..163e96fde 100644 --- a/litellm/llms/aleph_alpha.py +++ b/litellm/llms/aleph_alpha.py @@ -1,12 +1,15 @@ -import os, types import json -from enum import Enum -import requests # type: ignore +import os import time +import types +from enum import Enum from typing import Callable, Optional -import litellm -from litellm.utils import ModelResponse, Choices, Message, Usage + import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.utils import Choices, Message, ModelResponse, Usage class AlephAlphaError(Exception): @@ -275,7 +278,7 @@ def completion( message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except: raise AlephAlphaError( message=json.dumps(completion_response), @@ -291,8 +294,8 @@ def completion( ) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index a4521a703..733cce1e0 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -20,19 +20,43 @@ from litellm.llms.custom_httpx.http_handler import ( _get_httpx_client, ) from litellm.types.llms.anthropic import ( + AnthopicMessagesAssistantMessageParam, + AnthropicFinishReason, + AnthropicMessagesRequest, + AnthropicMessagesTool, AnthropicMessagesToolChoice, + AnthropicMessagesUserMessageParam, + AnthropicResponse, + AnthropicResponseContentBlockText, + AnthropicResponseContentBlockToolUse, + AnthropicResponseUsageBlock, ContentBlockDelta, ContentBlockStart, MessageBlockDelta, MessageStartBlock, ) from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionAssistantMessage, + ChatCompletionAssistantToolCall, + ChatCompletionImageObject, + ChatCompletionImageUrlObject, + ChatCompletionRequest, ChatCompletionResponseMessage, + ChatCompletionSystemMessage, + ChatCompletionTextObject, ChatCompletionToolCallChunk, ChatCompletionToolCallFunctionChunk, + ChatCompletionToolChoiceFunctionParam, + ChatCompletionToolChoiceObjectParam, + ChatCompletionToolChoiceValues, + ChatCompletionToolMessage, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, ChatCompletionUsageBlock, + ChatCompletionUserMessage, ) -from litellm.types.utils import GenericStreamingChunk +from litellm.types.utils import Choices, GenericStreamingChunk from litellm.utils import CustomStreamWrapper, ModelResponse, Usage from .base import BaseLLM @@ -168,6 +192,287 @@ class AnthropicConfig: optional_params["top_p"] = value return optional_params + ### FOR [BETA] `/v1/messages` endpoint support + + def translatable_anthropic_params(self) -> List: + """ + Which anthropic params, we need to translate to the openai format. + """ + return ["messages", "metadata", "system", "tool_choice", "tools"] + + def translate_anthropic_messages_to_openai( + self, + messages: List[ + Union[ + AnthropicMessagesUserMessageParam, + AnthopicMessagesAssistantMessageParam, + ] + ], + ) -> List: + new_messages: List[AllMessageValues] = [] + for m in messages: + user_message: Optional[ChatCompletionUserMessage] = None + tool_message_list: List[ChatCompletionToolMessage] = [] + ## USER MESSAGE ## + if m["role"] == "user": + ## translate user message + if isinstance(m["content"], str): + user_message = ChatCompletionUserMessage( + role="user", content=m["content"] + ) + elif isinstance(m["content"], list): + new_user_content_list: List[ + Union[ChatCompletionTextObject, ChatCompletionImageObject] + ] = [] + for content in m["content"]: + if content["type"] == "text": + text_obj = ChatCompletionTextObject( + type="text", text=content["text"] + ) + new_user_content_list.append(text_obj) + elif content["type"] == "image": + image_url = ChatCompletionImageUrlObject( + url=f"data:{content['type']};base64,{content['source']}" + ) + image_obj = ChatCompletionImageObject( + type="image_url", image_url=image_url + ) + + new_user_content_list.append(image_obj) + elif content["type"] == "tool_result": + if "content" not in content: + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content="", + ) + tool_message_list.append(tool_result) + elif isinstance(content["content"], str): + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=content["content"], + ) + tool_message_list.append(tool_result) + elif isinstance(content["content"], list): + for c in content["content"]: + if c["type"] == "text": + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=c["text"], + ) + tool_message_list.append(tool_result) + elif c["type"] == "image": + image_str = ( + f"data:{c['type']};base64,{c['source']}" + ) + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=image_str, + ) + tool_message_list.append(tool_result) + + if user_message is not None: + new_messages.append(user_message) + + if len(tool_message_list) > 0: + new_messages.extend(tool_message_list) + + ## ASSISTANT MESSAGE ## + assistant_message_str: Optional[str] = None + tool_calls: List[ChatCompletionAssistantToolCall] = [] + if m["role"] == "assistant": + if isinstance(m["content"], str): + assistant_message_str = m["content"] + elif isinstance(m["content"], list): + for content in m["content"]: + if content["type"] == "text": + if assistant_message_str is None: + assistant_message_str = content["text"] + else: + assistant_message_str += content["text"] + elif content["type"] == "tool_use": + function_chunk = ChatCompletionToolCallFunctionChunk( + name=content["name"], + arguments=json.dumps(content["input"]), + ) + + tool_calls.append( + ChatCompletionAssistantToolCall( + id=content["id"], + type="function", + function=function_chunk, + ) + ) + + if assistant_message_str is not None or len(tool_calls) > 0: + assistant_message = ChatCompletionAssistantMessage( + role="assistant", + content=assistant_message_str, + ) + if len(tool_calls) > 0: + assistant_message["tool_calls"] = tool_calls + new_messages.append(assistant_message) + + return new_messages + + def translate_anthropic_tool_choice_to_openai( + self, tool_choice: AnthropicMessagesToolChoice + ) -> ChatCompletionToolChoiceValues: + if tool_choice["type"] == "any": + return "required" + elif tool_choice["type"] == "auto": + return "auto" + elif tool_choice["type"] == "tool": + tc_function_param = ChatCompletionToolChoiceFunctionParam( + name=tool_choice.get("name", "") + ) + return ChatCompletionToolChoiceObjectParam( + type="function", function=tc_function_param + ) + else: + raise ValueError( + "Incompatible tool choice param submitted - {}".format(tool_choice) + ) + + def translate_anthropic_tools_to_openai( + self, tools: List[AnthropicMessagesTool] + ) -> List[ChatCompletionToolParam]: + new_tools: List[ChatCompletionToolParam] = [] + for tool in tools: + function_chunk = ChatCompletionToolParamFunctionChunk( + name=tool["name"], + parameters=tool["input_schema"], + ) + if "description" in tool: + function_chunk["description"] = tool["description"] + new_tools.append( + ChatCompletionToolParam(type="function", function=function_chunk) + ) + + return new_tools + + def translate_anthropic_to_openai( + self, anthropic_message_request: AnthropicMessagesRequest + ) -> ChatCompletionRequest: + """ + This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format. + """ + new_messages: List[AllMessageValues] = [] + + ## CONVERT ANTHROPIC MESSAGES TO OPENAI + new_messages = self.translate_anthropic_messages_to_openai( + messages=anthropic_message_request["messages"] + ) + ## ADD SYSTEM MESSAGE TO MESSAGES + if "system" in anthropic_message_request: + new_messages.insert( + 0, + ChatCompletionSystemMessage( + role="system", content=anthropic_message_request["system"] + ), + ) + + new_kwargs: ChatCompletionRequest = { + "model": anthropic_message_request["model"], + "messages": new_messages, + } + ## CONVERT METADATA (user_id) + if "metadata" in anthropic_message_request: + if "user_id" in anthropic_message_request["metadata"]: + new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"] + + ## CONVERT TOOL CHOICE + if "tool_choice" in anthropic_message_request: + new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai( + tool_choice=anthropic_message_request["tool_choice"] + ) + ## CONVERT TOOLS + if "tools" in anthropic_message_request: + new_kwargs["tools"] = self.translate_anthropic_tools_to_openai( + tools=anthropic_message_request["tools"] + ) + + translatable_params = self.translatable_anthropic_params() + for k, v in anthropic_message_request.items(): + if k not in translatable_params: # pass remaining params as is + new_kwargs[k] = v # type: ignore + + return new_kwargs + + def _translate_openai_content_to_anthropic( + self, choices: List[Choices] + ) -> List[ + Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] + ]: + new_content: List[ + Union[ + AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse + ] + ] = [] + for choice in choices: + if ( + choice.message.tool_calls is not None + and len(choice.message.tool_calls) > 0 + ): + for tool_call in choice.message.tool_calls: + new_content.append( + AnthropicResponseContentBlockToolUse( + type="tool_use", + id=tool_call.id, + name=tool_call.function.name or "", + input=json.loads(tool_call.function.arguments), + ) + ) + elif choice.message.content is not None: + new_content.append( + AnthropicResponseContentBlockText( + type="text", text=choice.message.content + ) + ) + + return new_content + + def _translate_openai_finish_reason_to_anthropic( + self, openai_finish_reason: str + ) -> AnthropicFinishReason: + if openai_finish_reason == "stop": + return "end_turn" + elif openai_finish_reason == "length": + return "max_tokens" + elif openai_finish_reason == "tool_calls": + return "tool_use" + return "end_turn" + + def translate_openai_response_to_anthropic( + self, response: litellm.ModelResponse + ) -> AnthropicResponse: + ## translate content block + anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore + ## extract finish reason + anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic( + openai_finish_reason=response.choices[0].finish_reason # type: ignore + ) + # extract usage + usage: litellm.Usage = getattr(response, "usage") + anthropic_usage = AnthropicResponseUsageBlock( + input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens + ) + translated_obj = AnthropicResponse( + id=response.id, + type="message", + role="assistant", + model=response.model or "unknown-model", + stop_sequence=None, + usage=anthropic_usage, + content=anthropic_content, + stop_reason=anthropic_finish_reason, + ) + + return translated_obj + # makes headers for API call def validate_environment(api_key, user_headers): @@ -231,121 +536,6 @@ class AnthropicChatCompletion(BaseLLM): def __init__(self) -> None: super().__init__() - # def process_streaming_response( - # self, - # model: str, - # response: Union[requests.Response, httpx.Response], - # model_response: ModelResponse, - # stream: bool, - # logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, - # optional_params: dict, - # api_key: str, - # data: Union[dict, str], - # messages: List, - # print_verbose, - # encoding, - # ) -> CustomStreamWrapper: - # """ - # Return stream object for tool-calling + streaming - # """ - # ## LOGGING - # logging_obj.post_call( - # input=messages, - # api_key=api_key, - # original_response=response.text, - # additional_args={"complete_input_dict": data}, - # ) - # print_verbose(f"raw model_response: {response.text}") - # ## RESPONSE OBJECT - # try: - # completion_response = response.json() - # except: - # raise AnthropicError( - # message=response.text, status_code=response.status_code - # ) - # text_content = "" - # tool_calls = [] - # for content in completion_response["content"]: - # if content["type"] == "text": - # text_content += content["text"] - # ## TOOL CALLING - # elif content["type"] == "tool_use": - # tool_calls.append( - # { - # "id": content["id"], - # "type": "function", - # "function": { - # "name": content["name"], - # "arguments": json.dumps(content["input"]), - # }, - # } - # ) - # if "error" in completion_response: - # raise AnthropicError( - # message=str(completion_response["error"]), - # status_code=response.status_code, - # ) - # _message = litellm.Message( - # tool_calls=tool_calls, - # content=text_content or None, - # ) - # model_response.choices[0].message = _message # type: ignore - # model_response._hidden_params["original_response"] = completion_response[ - # "content" - # ] # allow user to access raw anthropic tool calling response - - # model_response.choices[0].finish_reason = map_finish_reason( - # completion_response["stop_reason"] - # ) - - # print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK") - # # return an iterator - # streaming_model_response = ModelResponse(stream=True) - # streaming_model_response.choices[0].finish_reason = model_response.choices[ # type: ignore - # 0 - # ].finish_reason - # # streaming_model_response.choices = [litellm.utils.StreamingChoices()] - # streaming_choice = litellm.utils.StreamingChoices() - # streaming_choice.index = model_response.choices[0].index - # _tool_calls = [] - # print_verbose( - # f"type of model_response.choices[0]: {type(model_response.choices[0])}" - # ) - # print_verbose(f"type of streaming_choice: {type(streaming_choice)}") - # if isinstance(model_response.choices[0], litellm.Choices): - # if getattr( - # model_response.choices[0].message, "tool_calls", None - # ) is not None and isinstance( - # model_response.choices[0].message.tool_calls, list - # ): - # for tool_call in model_response.choices[0].message.tool_calls: - # _tool_call = {**tool_call.dict(), "index": 0} - # _tool_calls.append(_tool_call) - # delta_obj = litellm.utils.Delta( - # content=getattr(model_response.choices[0].message, "content", None), - # role=model_response.choices[0].message.role, - # tool_calls=_tool_calls, - # ) - # streaming_choice.delta = delta_obj - # streaming_model_response.choices = [streaming_choice] - # completion_stream = ModelResponseIterator( - # model_response=streaming_model_response - # ) - # print_verbose( - # "Returns anthropic CustomStreamWrapper with 'cached_response' streaming object" - # ) - # return CustomStreamWrapper( - # completion_stream=completion_stream, - # model=model, - # custom_llm_provider="cached_response", - # logging_obj=logging_obj, - # ) - # else: - # raise AnthropicError( - # status_code=422, - # message="Unprocessable response object - {}".format(response.text), - # ) - def process_response( self, model: str, @@ -417,8 +607,8 @@ class AnthropicChatCompletion(BaseLLM): completion_tokens = completion_response["usage"]["output_tokens"] total_tokens = prompt_tokens + completion_tokens - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/anthropic_text.py b/litellm/llms/anthropic_text.py index 0093d9f35..d20e49daf 100644 --- a/litellm/llms/anthropic_text.py +++ b/litellm/llms/anthropic_text.py @@ -1,15 +1,19 @@ -import os, types import json -from enum import Enum -import requests +import os import time +import types +from enum import Enum from typing import Callable, Optional -from litellm.utils import ModelResponse, Usage, CustomStreamWrapper -import litellm -from .prompt_templates.factory import prompt_factory, custom_prompt + import httpx -from .base import BaseLLM +import requests + +import litellm from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.utils import CustomStreamWrapper, ModelResponse, Usage + +from .base import BaseLLM +from .prompt_templates.factory import custom_prompt, prompt_factory class AnthropicConstants(Enum): @@ -117,9 +121,9 @@ class AnthropicTextCompletion(BaseLLM): ) else: if len(completion_response["completion"]) > 0: - model_response["choices"][0]["message"]["content"] = ( - completion_response["completion"] - ) + model_response.choices[0].message.content = completion_response[ # type: ignore + "completion" + ] model_response.choices[0].finish_reason = completion_response["stop_reason"] ## CALCULATING USAGE @@ -130,8 +134,8 @@ class AnthropicTextCompletion(BaseLLM): encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) ##[TODO] use the anthropic tokenizer here - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/baseten.py b/litellm/llms/baseten.py index 643dae530..d856efc9a 100644 --- a/litellm/llms/baseten.py +++ b/litellm/llms/baseten.py @@ -1,9 +1,11 @@ -import os import json -from enum import Enum -import requests # type: ignore +import os import time +from enum import Enum from typing import Callable + +import requests # type: ignore + from litellm.utils import ModelResponse, Usage @@ -106,28 +108,32 @@ def completion( and "data" in completion_response["model_output"] and isinstance(completion_response["model_output"]["data"], list) ): - model_response["choices"][0]["message"]["content"] = ( - completion_response["model_output"]["data"][0] - ) + model_response.choices[0].message.content = completion_response[ # type: ignore + "model_output" + ][ + "data" + ][ + 0 + ] elif isinstance(completion_response["model_output"], str): - model_response["choices"][0]["message"]["content"] = ( - completion_response["model_output"] - ) + model_response.choices[0].message.content = completion_response[ # type: ignore + "model_output" + ] elif "completion" in completion_response and isinstance( completion_response["completion"], str ): - model_response["choices"][0]["message"]["content"] = ( - completion_response["completion"] - ) + model_response.choices[0].message.content = completion_response[ # type: ignore + "completion" + ] elif isinstance(completion_response, list) and len(completion_response) > 0: if "generated_text" not in completion_response: raise BasetenError( message=f"Unable to parse response. Original response: {response.text}", status_code=response.status_code, ) - model_response["choices"][0]["message"]["content"] = ( - completion_response[0]["generated_text"] - ) + model_response.choices[0].message.content = completion_response[0][ # type: ignore + "generated_text" + ] ## GETTING LOGPROBS if ( "details" in completion_response[0] @@ -139,7 +145,7 @@ def completion( sum_logprob = 0 for token in completion_response[0]["details"]["tokens"]: sum_logprob += token["logprob"] - model_response["choices"][0]["message"]._logprobs = sum_logprob + model_response.choices[0].logprobs = sum_logprob else: raise BasetenError( message=f"Unable to parse response. Original response: {response.text}", @@ -152,8 +158,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"]["content"]) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index a8c47b3b9..e12b656ed 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -1122,7 +1122,7 @@ def completion( logging_obj=logging_obj, ) - model_response["finish_reason"] = map_finish_reason( + model_response.choices[0].finish_reason = map_finish_reason( response_body["stop_reason"] ) _usage = litellm.Usage( @@ -1134,14 +1134,16 @@ def completion( setattr(model_response, "usage", _usage) else: outputText = response_body["completion"] - model_response["finish_reason"] = response_body["stop_reason"] + model_response.choices[0].finish_reason = response_body["stop_reason"] elif provider == "cohere": outputText = response_body["generations"][0]["text"] elif provider == "meta": outputText = response_body["generation"] elif provider == "mistral": outputText = response_body["outputs"][0]["text"] - model_response["finish_reason"] = response_body["outputs"][0]["stop_reason"] + model_response.choices[0].finish_reason = response_body["outputs"][0][ + "stop_reason" + ] else: # amazon titan outputText = response_body.get("results")[0].get("outputText") @@ -1160,7 +1162,7 @@ def completion( and getattr(model_response.choices[0].message, "tool_calls", None) is None ): - model_response["choices"][0]["message"]["content"] = outputText + model_response.choices[0].message.content = outputText elif ( hasattr(model_response.choices[0], "message") and getattr(model_response.choices[0].message, "tool_calls", None) @@ -1199,8 +1201,8 @@ def completion( ) setattr(model_response, "usage", usage) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model model_response._hidden_params["region_name"] = client.meta.region_name print_verbose(f"model_response._hidden_params: {model_response._hidden_params}") @@ -1323,9 +1325,9 @@ def _embedding_func_single( def embedding( model: str, input: Union[list, str], + model_response: litellm.EmbeddingResponse, api_key: Optional[str] = None, logging_obj=None, - model_response=None, optional_params=None, encoding=None, ): @@ -1391,9 +1393,9 @@ def embedding( "embedding": embedding, } ) - model_response["object"] = "list" - model_response["data"] = embedding_response - model_response["model"] = model + model_response.object = "list" + model_response.data = embedding_response + model_response.model = model input_tokens = 0 input_str = "".join(input) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 4e737d1d2..f2700495f 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -521,7 +521,7 @@ class BedrockLLM(BaseLLM): outputText = completion_response["text"] # type: ignore elif "generations" in completion_response: outputText = completion_response["generations"][0]["text"] - model_response["finish_reason"] = map_finish_reason( + model_response.choices[0].finish_reason = map_finish_reason( completion_response["generations"][0]["finish_reason"] ) elif provider == "anthropic": @@ -625,7 +625,7 @@ class BedrockLLM(BaseLLM): logging_obj=logging_obj, ) - model_response["finish_reason"] = map_finish_reason( + model_response.choices[0].finish_reason = map_finish_reason( completion_response.get("stop_reason", "") ) _usage = litellm.Usage( @@ -638,7 +638,9 @@ class BedrockLLM(BaseLLM): else: outputText = completion_response["completion"] - model_response["finish_reason"] = completion_response["stop_reason"] + model_response.choices[0].finish_reason = completion_response[ + "stop_reason" + ] elif provider == "ai21": outputText = ( completion_response.get("completions")[0].get("data").get("text") @@ -647,9 +649,9 @@ class BedrockLLM(BaseLLM): outputText = completion_response["generation"] elif provider == "mistral": outputText = completion_response["outputs"][0]["text"] - model_response["finish_reason"] = completion_response["outputs"][0][ - "stop_reason" - ] + model_response.choices[0].finish_reason = completion_response[ + "outputs" + ][0]["stop_reason"] else: # amazon titan outputText = completion_response.get("results")[0].get("outputText") except Exception as e: @@ -667,7 +669,7 @@ class BedrockLLM(BaseLLM): and getattr(model_response.choices[0].message, "tool_calls", None) is None ): - model_response["choices"][0]["message"]["content"] = outputText + model_response.choices[0].message.content = outputText elif ( hasattr(model_response.choices[0], "message") and getattr(model_response.choices[0].message, "tool_calls", None) @@ -723,8 +725,8 @@ class BedrockLLM(BaseLLM): ) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -1066,7 +1068,7 @@ class BedrockLLM(BaseLLM): if response.status_code != 200: raise BedrockError( - status_code=response.status_code, message=response.text + status_code=response.status_code, message=response.read() ) decoder = AWSEventStreamDecoder(model=model) @@ -1446,8 +1448,8 @@ class BedrockConverseLLM(BaseLLM): message=litellm.Message(**chat_completion_message), ) ] - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=input_tokens, completion_tokens=output_tokens, diff --git a/litellm/llms/clarifai.py b/litellm/llms/clarifai.py index 785a7ad38..613ee5ced 100644 --- a/litellm/llms/clarifai.py +++ b/litellm/llms/clarifai.py @@ -1,13 +1,18 @@ -import os, types, traceback import json -import requests +import os import time +import traceback +import types from typing import Callable, Optional -from litellm.utils import ModelResponse, Usage, Choices, Message, CustomStreamWrapper -import litellm + import httpx +import requests + +import litellm from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler -from .prompt_templates.factory import prompt_factory, custom_prompt +from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage + +from .prompt_templates.factory import custom_prompt, prompt_factory class ClarifaiError(Exception): @@ -87,7 +92,14 @@ def completions_to_model(payload): def process_response( - model, prompt, response, model_response, api_key, data, encoding, logging_obj + model, + prompt, + response, + model_response: litellm.ModelResponse, + api_key, + data, + encoding, + logging_obj, ): logging_obj.post_call( input=prompt, @@ -116,7 +128,7 @@ def process_response( message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except Exception as e: raise ClarifaiError( @@ -128,11 +140,15 @@ def process_response( completion_tokens = len( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["model"] = model - model_response["usage"] = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + model_response.model = model + setattr( + model_response, + "usage", + Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response @@ -202,7 +218,7 @@ async def async_completion( message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except Exception as e: raise ClarifaiError( @@ -214,11 +230,15 @@ async def async_completion( completion_tokens = len( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["model"] = model - model_response["usage"] = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + model_response.model = model + setattr( + model_response, + "usage", + Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response diff --git a/litellm/llms/cloudflare.py b/litellm/llms/cloudflare.py index 5a24b3b44..516b490f4 100644 --- a/litellm/llms/cloudflare.py +++ b/litellm/llms/cloudflare.py @@ -1,13 +1,17 @@ -import os, types import json -from enum import Enum -import requests # type: ignore +import os import time +import types +from enum import Enum from typing import Callable, Optional -import litellm + import httpx # type: ignore +import requests # type: ignore + +import litellm from litellm.utils import ModelResponse, Usage -from .prompt_templates.factory import prompt_factory, custom_prompt + +from .prompt_templates.factory import custom_prompt, prompt_factory class CloudflareError(Exception): @@ -147,9 +151,9 @@ def completion( ) completion_response = response.json() - model_response["choices"][0]["message"]["content"] = completion_response[ - "result" - ]["response"] + model_response.choices[0].message.content = completion_response["result"][ # type: ignore + "response" + ] ## CALCULATING USAGE print_verbose( @@ -160,8 +164,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = "cloudflare/" + model + model_response.created = int(time.time()) + model_response.model = "cloudflare/" + model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py index 14a66b54a..d946a8dde 100644 --- a/litellm/llms/cohere.py +++ b/litellm/llms/cohere.py @@ -1,12 +1,16 @@ -import os, types import json +import os +import time +import traceback +import types from enum import Enum -import requests # type: ignore -import time, traceback from typing import Callable, Optional -from litellm.utils import ModelResponse, Choices, Message, Usage -import litellm + import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.utils import Choices, Message, ModelResponse, Usage class CohereError(Exception): @@ -117,7 +121,7 @@ class CohereConfig: def validate_environment(api_key): headers = { - "Request-Source":"unspecified:litellm", + "Request-Source": "unspecified:litellm", "accept": "application/json", "content-type": "application/json", } @@ -219,7 +223,7 @@ def completion( message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except Exception as e: raise CohereError( message=response.text, status_code=response.status_code @@ -231,8 +235,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -245,9 +249,9 @@ def completion( def embedding( model: str, input: list, + model_response: litellm.EmbeddingResponse, api_key: Optional[str] = None, logging_obj=None, - model_response=None, encoding=None, optional_params=None, ): @@ -294,14 +298,18 @@ def embedding( output_data.append( {"object": "embedding", "index": idx, "embedding": embedding} ) - model_response["object"] = "list" - model_response["data"] = output_data - model_response["model"] = model + model_response.object = "list" + model_response.data = output_data + model_response.model = model input_tokens = 0 for text in input: input_tokens += len(encoding.encode(text)) - model_response["usage"] = Usage( - prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + setattr( + model_response, + "usage", + Usage( + prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + ), ) return model_response diff --git a/litellm/llms/cohere_chat.py b/litellm/llms/cohere_chat.py index 1b3aa8405..fe2092946 100644 --- a/litellm/llms/cohere_chat.py +++ b/litellm/llms/cohere_chat.py @@ -305,8 +305,8 @@ def completion( prompt_tokens = billed_units.get("input_tokens", 0) completion_tokens = billed_units.get("output_tokens", 0) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 1ab09246b..88fa58abe 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -1,26 +1,26 @@ # What is this? ## Handler file for databricks API https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#chat-request -from functools import partial -import os, types +import copy import json -from enum import Enum -import requests, copy # type: ignore +import os import time -from typing import Callable, Optional, List, Union, Tuple, Literal -from litellm.utils import ( - ModelResponse, - Usage, - CustomStreamWrapper, - EmbeddingResponse, -) -from litellm.litellm_core_utils.core_helpers import map_finish_reason -import litellm -from .prompt_templates.factory import prompt_factory, custom_prompt -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from .base import BaseLLM +import types +from enum import Enum +from functools import partial +from typing import Callable, List, Literal, Optional, Tuple, Union + import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.types.llms.databricks import GenericStreamingChunk from litellm.types.utils import ProviderField +from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage + +from .base import BaseLLM +from .prompt_templates.factory import custom_prompt, prompt_factory class DatabricksError(Exception): @@ -354,8 +354,8 @@ class DatabricksChatCompletion(BaseLLM): completion_tokens = completion_response["usage"]["output_tokens"] total_tokens = prompt_tokens + completion_tokens - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/gemini.py b/litellm/llms/gemini.py index f48c4e29e..3ce63e93f 100644 --- a/litellm/llms/gemini.py +++ b/litellm/llms/gemini.py @@ -1,7 +1,7 @@ -#################################### -######### DEPRECATED FILE ########## -#################################### -# logic moved to `vertex_httpx.py` # +# #################################### +# ######### DEPRECATED FILE ########## +# #################################### +# # logic moved to `vertex_httpx.py` # import copy import time @@ -92,332 +92,332 @@ class GeminiConfig: } -class TextStreamer: - """ - A class designed to return an async stream from AsyncGenerateContentResponse object. - """ +# class TextStreamer: +# """ +# A class designed to return an async stream from AsyncGenerateContentResponse object. +# """ - def __init__(self, response): - self.response = response - self._aiter = self.response.__aiter__() +# def __init__(self, response): +# self.response = response +# self._aiter = self.response.__aiter__() - async def __aiter__(self): - while True: - try: - # This will manually advance the async iterator. - # In the case the next object doesn't exists, __anext__() will simply raise a StopAsyncIteration exception - next_object = await self._aiter.__anext__() - yield next_object - except StopAsyncIteration: - # After getting all items from the async iterator, stop iterating - break +# async def __aiter__(self): +# while True: +# try: +# # This will manually advance the async iterator. +# # In the case the next object doesn't exists, __anext__() will simply raise a StopAsyncIteration exception +# next_object = await self._aiter.__anext__() +# yield next_object +# except StopAsyncIteration: +# # After getting all items from the async iterator, stop iterating +# break -def supports_system_instruction(): - import google.generativeai as genai +# def supports_system_instruction(): +# import google.generativeai as genai - gemini_pkg_version = Version(genai.__version__) - return gemini_pkg_version >= Version("0.5.0") +# gemini_pkg_version = Version(genai.__version__) +# return gemini_pkg_version >= Version("0.5.0") -def completion( - model: str, - messages: list, - model_response: ModelResponse, - print_verbose: Callable, - api_key, - encoding, - logging_obj, - custom_prompt_dict: dict, - acompletion: bool = False, - optional_params=None, - litellm_params=None, - logger_fn=None, -): - try: - import google.generativeai as genai # type: ignore - except: - raise Exception( - "Importing google.generativeai failed, please run 'pip install -q google-generativeai" - ) - genai.configure(api_key=api_key) - system_prompt = "" - if model in custom_prompt_dict: - # check if the model has a registered custom prompt - model_prompt_details = custom_prompt_dict[model] - prompt = custom_prompt( - role_dict=model_prompt_details["roles"], - initial_prompt_value=model_prompt_details["initial_prompt_value"], - final_prompt_value=model_prompt_details["final_prompt_value"], - messages=messages, - ) - else: - system_prompt, messages = get_system_prompt(messages=messages) - prompt = prompt_factory( - model=model, messages=messages, custom_llm_provider="gemini" - ) +# def completion( +# model: str, +# messages: list, +# model_response: ModelResponse, +# print_verbose: Callable, +# api_key, +# encoding, +# logging_obj, +# custom_prompt_dict: dict, +# acompletion: bool = False, +# optional_params=None, +# litellm_params=None, +# logger_fn=None, +# ): +# try: +# import google.generativeai as genai # type: ignore +# except: +# raise Exception( +# "Importing google.generativeai failed, please run 'pip install -q google-generativeai" +# ) +# genai.configure(api_key=api_key) +# system_prompt = "" +# if model in custom_prompt_dict: +# # check if the model has a registered custom prompt +# model_prompt_details = custom_prompt_dict[model] +# prompt = custom_prompt( +# role_dict=model_prompt_details["roles"], +# initial_prompt_value=model_prompt_details["initial_prompt_value"], +# final_prompt_value=model_prompt_details["final_prompt_value"], +# messages=messages, +# ) +# else: +# system_prompt, messages = get_system_prompt(messages=messages) +# prompt = prompt_factory( +# model=model, messages=messages, custom_llm_provider="gemini" +# ) - ## Load Config - inference_params = copy.deepcopy(optional_params) - stream = inference_params.pop("stream", None) +# ## Load Config +# inference_params = copy.deepcopy(optional_params) +# stream = inference_params.pop("stream", None) - # Handle safety settings - safety_settings_param = inference_params.pop("safety_settings", None) - safety_settings = None - if safety_settings_param: - safety_settings = [ - genai.types.SafetySettingDict(x) for x in safety_settings_param - ] +# # Handle safety settings +# safety_settings_param = inference_params.pop("safety_settings", None) +# safety_settings = None +# if safety_settings_param: +# safety_settings = [ +# genai.types.SafetySettingDict(x) for x in safety_settings_param +# ] - config = litellm.GeminiConfig.get_config() - for k, v in config.items(): - if ( - k not in inference_params - ): # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in - inference_params[k] = v +# config = litellm.GeminiConfig.get_config() +# for k, v in config.items(): +# if ( +# k not in inference_params +# ): # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in +# inference_params[k] = v - ## LOGGING - logging_obj.pre_call( - input=prompt, - api_key="", - additional_args={ - "complete_input_dict": { - "inference_params": inference_params, - "system_prompt": system_prompt, - } - }, - ) - ## COMPLETION CALL - try: - _params = {"model_name": "models/{}".format(model)} - _system_instruction = supports_system_instruction() - if _system_instruction and len(system_prompt) > 0: - _params["system_instruction"] = system_prompt - _model = genai.GenerativeModel(**_params) - if stream is True: - if acompletion is True: +# ## LOGGING +# logging_obj.pre_call( +# input=prompt, +# api_key="", +# additional_args={ +# "complete_input_dict": { +# "inference_params": inference_params, +# "system_prompt": system_prompt, +# } +# }, +# ) +# ## COMPLETION CALL +# try: +# _params = {"model_name": "models/{}".format(model)} +# _system_instruction = supports_system_instruction() +# if _system_instruction and len(system_prompt) > 0: +# _params["system_instruction"] = system_prompt +# _model = genai.GenerativeModel(**_params) +# if stream is True: +# if acompletion is True: - async def async_streaming(): - try: - response = await _model.generate_content_async( - contents=prompt, - generation_config=genai.types.GenerationConfig( - **inference_params - ), - safety_settings=safety_settings, - stream=True, - ) +# async def async_streaming(): +# try: +# response = await _model.generate_content_async( +# contents=prompt, +# generation_config=genai.types.GenerationConfig( +# **inference_params +# ), +# safety_settings=safety_settings, +# stream=True, +# ) - response = litellm.CustomStreamWrapper( - TextStreamer(response), - model, - custom_llm_provider="gemini", - logging_obj=logging_obj, - ) - return response - except Exception as e: - raise GeminiError(status_code=500, message=str(e)) +# response = litellm.CustomStreamWrapper( +# TextStreamer(response), +# model, +# custom_llm_provider="gemini", +# logging_obj=logging_obj, +# ) +# return response +# except Exception as e: +# raise GeminiError(status_code=500, message=str(e)) - return async_streaming() - response = _model.generate_content( - contents=prompt, - generation_config=genai.types.GenerationConfig(**inference_params), - safety_settings=safety_settings, - stream=True, - ) - return response - elif acompletion == True: - return async_completion( - _model=_model, - model=model, - prompt=prompt, - inference_params=inference_params, - safety_settings=safety_settings, - logging_obj=logging_obj, - print_verbose=print_verbose, - model_response=model_response, - messages=messages, - encoding=encoding, - ) - else: - params = { - "contents": prompt, - "generation_config": genai.types.GenerationConfig(**inference_params), - "safety_settings": safety_settings, - } - response = _model.generate_content(**params) - except Exception as e: - raise GeminiError( - message=str(e), - status_code=500, - ) +# return async_streaming() +# response = _model.generate_content( +# contents=prompt, +# generation_config=genai.types.GenerationConfig(**inference_params), +# safety_settings=safety_settings, +# stream=True, +# ) +# return response +# elif acompletion == True: +# return async_completion( +# _model=_model, +# model=model, +# prompt=prompt, +# inference_params=inference_params, +# safety_settings=safety_settings, +# logging_obj=logging_obj, +# print_verbose=print_verbose, +# model_response=model_response, +# messages=messages, +# encoding=encoding, +# ) +# else: +# params = { +# "contents": prompt, +# "generation_config": genai.types.GenerationConfig(**inference_params), +# "safety_settings": safety_settings, +# } +# response = _model.generate_content(**params) +# except Exception as e: +# raise GeminiError( +# message=str(e), +# status_code=500, +# ) - ## LOGGING - logging_obj.post_call( - input=prompt, - api_key="", - original_response=response, - additional_args={"complete_input_dict": {}}, - ) - print_verbose(f"raw model_response: {response}") - ## RESPONSE OBJECT - completion_response = response - try: - choices_list = [] - for idx, item in enumerate(completion_response.candidates): - if len(item.content.parts) > 0: - message_obj = Message(content=item.content.parts[0].text) - else: - message_obj = Message(content=None) - choice_obj = Choices(index=idx, message=message_obj) - choices_list.append(choice_obj) - model_response["choices"] = choices_list - except Exception as e: - verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) - verbose_logger.debug(traceback.format_exc()) - raise GeminiError( - message=traceback.format_exc(), status_code=response.status_code - ) +# ## LOGGING +# logging_obj.post_call( +# input=prompt, +# api_key="", +# original_response=response, +# additional_args={"complete_input_dict": {}}, +# ) +# print_verbose(f"raw model_response: {response}") +# ## RESPONSE OBJECT +# completion_response = response +# try: +# choices_list = [] +# for idx, item in enumerate(completion_response.candidates): +# if len(item.content.parts) > 0: +# message_obj = Message(content=item.content.parts[0].text) +# else: +# message_obj = Message(content=None) +# choice_obj = Choices(index=idx, message=message_obj) +# choices_list.append(choice_obj) +# model_response.choices = choices_list +# except Exception as e: +# verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) +# verbose_logger.debug(traceback.format_exc()) +# raise GeminiError( +# message=traceback.format_exc(), status_code=response.status_code +# ) - try: - completion_response = model_response["choices"][0]["message"].get("content") - if completion_response is None: - raise Exception - except: - original_response = f"response: {response}" - if hasattr(response, "candidates"): - original_response = f"response: {response.candidates}" - if "SAFETY" in original_response: - original_response += ( - "\nThe candidate content was flagged for safety reasons." - ) - elif "RECITATION" in original_response: - original_response += ( - "\nThe candidate content was flagged for recitation reasons." - ) - raise GeminiError( - status_code=400, - message=f"No response received. Original response - {original_response}", - ) +# try: +# completion_response = model_response["choices"][0]["message"].get("content") +# if completion_response is None: +# raise Exception +# except: +# original_response = f"response: {response}" +# if hasattr(response, "candidates"): +# original_response = f"response: {response.candidates}" +# if "SAFETY" in original_response: +# original_response += ( +# "\nThe candidate content was flagged for safety reasons." +# ) +# elif "RECITATION" in original_response: +# original_response += ( +# "\nThe candidate content was flagged for recitation reasons." +# ) +# raise GeminiError( +# status_code=400, +# message=f"No response received. Original response - {original_response}", +# ) - ## CALCULATING USAGE - prompt_str = "" - for m in messages: - if isinstance(m["content"], str): - prompt_str += m["content"] - elif isinstance(m["content"], list): - for content in m["content"]: - if content["type"] == "text": - prompt_str += content["text"] - prompt_tokens = len(encoding.encode(prompt_str)) - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"].get("content", "")) - ) +# ## CALCULATING USAGE +# prompt_str = "" +# for m in messages: +# if isinstance(m["content"], str): +# prompt_str += m["content"] +# elif isinstance(m["content"], list): +# for content in m["content"]: +# if content["type"] == "text": +# prompt_str += content["text"] +# prompt_tokens = len(encoding.encode(prompt_str)) +# completion_tokens = len( +# encoding.encode(model_response["choices"][0]["message"].get("content", "")) +# ) - model_response["created"] = int(time.time()) - model_response["model"] = "gemini/" + model - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, - ) - setattr(model_response, "usage", usage) - return model_response +# model_response.created = int(time.time()) +# model_response.model = "gemini/" + model +# usage = Usage( +# prompt_tokens=prompt_tokens, +# completion_tokens=completion_tokens, +# total_tokens=prompt_tokens + completion_tokens, +# ) +# setattr(model_response, "usage", usage) +# return model_response -async def async_completion( - _model, - model, - prompt, - inference_params, - safety_settings, - logging_obj, - print_verbose, - model_response, - messages, - encoding, -): - import google.generativeai as genai # type: ignore +# async def async_completion( +# _model, +# model, +# prompt, +# inference_params, +# safety_settings, +# logging_obj, +# print_verbose, +# model_response, +# messages, +# encoding, +# ): +# import google.generativeai as genai # type: ignore - response = await _model.generate_content_async( - contents=prompt, - generation_config=genai.types.GenerationConfig(**inference_params), - safety_settings=safety_settings, - ) +# response = await _model.generate_content_async( +# contents=prompt, +# generation_config=genai.types.GenerationConfig(**inference_params), +# safety_settings=safety_settings, +# ) - ## LOGGING - logging_obj.post_call( - input=prompt, - api_key="", - original_response=response, - additional_args={"complete_input_dict": {}}, - ) - print_verbose(f"raw model_response: {response}") - ## RESPONSE OBJECT - completion_response = response - try: - choices_list = [] - for idx, item in enumerate(completion_response.candidates): - if len(item.content.parts) > 0: - message_obj = Message(content=item.content.parts[0].text) - else: - message_obj = Message(content=None) - choice_obj = Choices(index=idx, message=message_obj) - choices_list.append(choice_obj) - model_response["choices"] = choices_list - except Exception as e: - verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) - verbose_logger.debug(traceback.format_exc()) - raise GeminiError( - message=traceback.format_exc(), status_code=response.status_code - ) +# ## LOGGING +# logging_obj.post_call( +# input=prompt, +# api_key="", +# original_response=response, +# additional_args={"complete_input_dict": {}}, +# ) +# print_verbose(f"raw model_response: {response}") +# ## RESPONSE OBJECT +# completion_response = response +# try: +# choices_list = [] +# for idx, item in enumerate(completion_response.candidates): +# if len(item.content.parts) > 0: +# message_obj = Message(content=item.content.parts[0].text) +# else: +# message_obj = Message(content=None) +# choice_obj = Choices(index=idx, message=message_obj) +# choices_list.append(choice_obj) +# model_response["choices"] = choices_list +# except Exception as e: +# verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) +# verbose_logger.debug(traceback.format_exc()) +# raise GeminiError( +# message=traceback.format_exc(), status_code=response.status_code +# ) - try: - completion_response = model_response["choices"][0]["message"].get("content") - if completion_response is None: - raise Exception - except: - original_response = f"response: {response}" - if hasattr(response, "candidates"): - original_response = f"response: {response.candidates}" - if "SAFETY" in original_response: - original_response += ( - "\nThe candidate content was flagged for safety reasons." - ) - elif "RECITATION" in original_response: - original_response += ( - "\nThe candidate content was flagged for recitation reasons." - ) - raise GeminiError( - status_code=400, - message=f"No response received. Original response - {original_response}", - ) +# try: +# completion_response = model_response["choices"][0]["message"].get("content") +# if completion_response is None: +# raise Exception +# except: +# original_response = f"response: {response}" +# if hasattr(response, "candidates"): +# original_response = f"response: {response.candidates}" +# if "SAFETY" in original_response: +# original_response += ( +# "\nThe candidate content was flagged for safety reasons." +# ) +# elif "RECITATION" in original_response: +# original_response += ( +# "\nThe candidate content was flagged for recitation reasons." +# ) +# raise GeminiError( +# status_code=400, +# message=f"No response received. Original response - {original_response}", +# ) - ## CALCULATING USAGE - prompt_str = "" - for m in messages: - if isinstance(m["content"], str): - prompt_str += m["content"] - elif isinstance(m["content"], list): - for content in m["content"]: - if content["type"] == "text": - prompt_str += content["text"] - prompt_tokens = len(encoding.encode(prompt_str)) - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"].get("content", "")) - ) +# ## CALCULATING USAGE +# prompt_str = "" +# for m in messages: +# if isinstance(m["content"], str): +# prompt_str += m["content"] +# elif isinstance(m["content"], list): +# for content in m["content"]: +# if content["type"] == "text": +# prompt_str += content["text"] +# prompt_tokens = len(encoding.encode(prompt_str)) +# completion_tokens = len( +# encoding.encode(model_response["choices"][0]["message"].get("content", "")) +# ) - model_response["created"] = int(time.time()) - model_response["model"] = "gemini/" + model - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, - ) - model_response.usage = usage - return model_response +# model_response["created"] = int(time.time()) +# model_response["model"] = "gemini/" + model +# usage = Usage( +# prompt_tokens=prompt_tokens, +# completion_tokens=completion_tokens, +# total_tokens=prompt_tokens + completion_tokens, +# ) +# model_response.usage = usage +# return model_response -def embedding(): - # logic for parsing in - calling - parsing out model embedding calls - pass +# def embedding(): +# # logic for parsing in - calling - parsing out model embedding calls +# pass diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index c54dba75f..8b755e2bb 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -1,17 +1,22 @@ ## Uses the huggingface text generation inference API -import os, copy, types -import json -from enum import Enum -import httpx, requests -from .base import BaseLLM -import time -import litellm -from typing import Callable, Dict, List, Any, Literal, Tuple -from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage -from typing import Optional -from .prompt_templates.factory import prompt_factory, custom_prompt -from litellm.types.completion import ChatCompletionMessageToolCallParam +import copy import enum +import json +import os +import time +import types +from enum import Enum +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple + +import httpx +import requests + +import litellm +from litellm.types.completion import ChatCompletionMessageToolCallParam +from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage + +from .base import BaseLLM +from .prompt_templates.factory import custom_prompt, prompt_factory class HuggingfaceError(Exception): @@ -269,7 +274,7 @@ class Huggingface(BaseLLM): def convert_to_model_response_object( self, completion_response, - model_response, + model_response: litellm.ModelResponse, task: hf_tasks, optional_params, encoding, @@ -278,11 +283,9 @@ class Huggingface(BaseLLM): ): if task == "conversational": if len(completion_response["generated_text"]) > 0: # type: ignore - model_response["choices"][0]["message"][ - "content" - ] = completion_response[ + model_response.choices[0].message.content = completion_response[ # type: ignore "generated_text" - ] # type: ignore + ] elif task == "text-generation-inference": if ( not isinstance(completion_response, list) @@ -295,7 +298,7 @@ class Huggingface(BaseLLM): ) if len(completion_response[0]["generated_text"]) > 0: - model_response["choices"][0]["message"]["content"] = output_parser( + model_response.choices[0].message.content = output_parser( # type: ignore completion_response[0]["generated_text"] ) ## GETTING LOGPROBS + FINISH REASON @@ -310,7 +313,7 @@ class Huggingface(BaseLLM): for token in completion_response[0]["details"]["tokens"]: if token["logprob"] != None: sum_logprob += token["logprob"] - model_response["choices"][0]["message"]._logprob = sum_logprob + setattr(model_response.choices[0].message, "_logprob", sum_logprob) # type: ignore if "best_of" in optional_params and optional_params["best_of"] > 1: if ( "details" in completion_response[0] @@ -337,14 +340,14 @@ class Huggingface(BaseLLM): message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"].extend(choices_list) + model_response.choices.extend(choices_list) elif task == "text-classification": - model_response["choices"][0]["message"]["content"] = json.dumps( + model_response.choices[0].message.content = json.dumps( # type: ignore completion_response ) else: if len(completion_response[0]["generated_text"]) > 0: - model_response["choices"][0]["message"]["content"] = output_parser( + model_response.choices[0].message.content = output_parser( # type: ignore completion_response[0]["generated_text"] ) ## CALCULATING USAGE @@ -371,14 +374,14 @@ class Huggingface(BaseLLM): else: completion_tokens = 0 - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=prompt_tokens + completion_tokens, ) - model_response.usage = usage + setattr(model_response, "usage", usage) model_response._hidden_params["original_response"] = completion_response return model_response @@ -763,10 +766,10 @@ class Huggingface(BaseLLM): self, model: str, input: list, + model_response: litellm.EmbeddingResponse, api_key: Optional[str] = None, api_base: Optional[str] = None, logging_obj=None, - model_response=None, encoding=None, ): super().embedding() @@ -867,15 +870,21 @@ class Huggingface(BaseLLM): ], # flatten list returned from hf } ) - model_response["object"] = "list" - model_response["data"] = output_data - model_response["model"] = model + model_response.object = "list" + model_response.data = output_data + model_response.model = model input_tokens = 0 for text in input: input_tokens += len(encoding.encode(text)) - model_response["usage"] = { - "prompt_tokens": input_tokens, - "total_tokens": input_tokens, - } + setattr( + model_response, + "usage", + litellm.Usage( + **{ + "prompt_tokens": input_tokens, + "total_tokens": input_tokens, + } + ), + ) return model_response diff --git a/litellm/llms/maritalk.py b/litellm/llms/maritalk.py index dfe53e9df..c2eb66382 100644 --- a/litellm/llms/maritalk.py +++ b/litellm/llms/maritalk.py @@ -1,11 +1,15 @@ -import os, types import json +import os +import time +import traceback +import types from enum import Enum +from typing import Callable, List, Optional + import requests # type: ignore -import time, traceback -from typing import Callable, Optional, List -from litellm.utils import ModelResponse, Choices, Message, Usage + import litellm +from litellm.utils import Choices, Message, ModelResponse, Usage class MaritalkError(Exception): @@ -152,9 +156,9 @@ def completion( else: try: if len(completion_response["answer"]) > 0: - model_response["choices"][0]["message"]["content"] = ( - completion_response["answer"] - ) + model_response.choices[0].message.content = completion_response[ # type: ignore + "answer" + ] except Exception as e: raise MaritalkError( message=response.text, status_code=response.status_code @@ -167,8 +171,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/nlp_cloud.py b/litellm/llms/nlp_cloud.py index cd5f17a90..84908f26b 100644 --- a/litellm/llms/nlp_cloud.py +++ b/litellm/llms/nlp_cloud.py @@ -1,9 +1,12 @@ -import os, types import json -from enum import Enum -import requests # type: ignore +import os import time +import types +from enum import Enum from typing import Callable, Optional + +import requests # type: ignore + import litellm from litellm.utils import ModelResponse, Usage @@ -185,7 +188,7 @@ def completion( else: try: if len(completion_response["generated_text"]) > 0: - model_response["choices"][0]["message"]["content"] = ( + model_response.choices[0].message.content = ( # type: ignore completion_response["generated_text"] ) except: @@ -198,8 +201,8 @@ def completion( prompt_tokens = completion_response["nb_input_tokens"] completion_tokens = completion_response["nb_generated_tokens"] - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 1939715b3..cae47fded 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -1,13 +1,21 @@ -from itertools import chain -import requests, types, time # type: ignore -import json, uuid +import asyncio +import json +import time import traceback -from typing import Optional, List +import types +import uuid +from itertools import chain +from typing import List, Optional + +import aiohttp +import httpx # type: ignore +import requests # type: ignore + import litellm -from litellm.types.utils import ProviderField -import httpx, aiohttp, asyncio # type: ignore -from .prompt_templates.factory import prompt_factory, custom_prompt from litellm import verbose_logger +from litellm.types.utils import ProviderField + +from .prompt_templates.factory import custom_prompt, prompt_factory class OllamaError(Exception): @@ -138,7 +146,6 @@ class OllamaConfig: ) ] - def get_supported_openai_params( self, ): @@ -157,7 +164,8 @@ class OllamaConfig: # ollama wants plain base64 jpeg/png files as images. strip any leading dataURI # and convert to jpeg if necessary. def _convert_image(image): - import base64, io + import base64 + import io try: from PIL import Image @@ -183,13 +191,13 @@ def _convert_image(image): # ollama implementation def get_ollama_response( + model_response: litellm.ModelResponse, api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?", optional_params=None, logging_obj=None, acompletion: bool = False, - model_response=None, encoding=None, ): if api_base.endswith("/api/generate"): @@ -271,7 +279,7 @@ def get_ollama_response( response_json = response.json() ## RESPONSE OBJECT - model_response["choices"][0]["finish_reason"] = "stop" + model_response.choices[0].finish_reason = "stop" if data.get("format", "") == "json": function_call = json.loads(response_json["response"]) message = litellm.Message( @@ -287,20 +295,24 @@ def get_ollama_response( } ], ) - model_response["choices"][0]["message"] = message - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].message = message # type: ignore + model_response.choices[0].finish_reason = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json["response"] - model_response["created"] = int(time.time()) - model_response["model"] = "ollama/" + model + model_response.choices[0].message.content = response_json["response"] # type: ignore + model_response.created = int(time.time()) + model_response.model = "ollama/" + model prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore completion_tokens = response_json.get( "eval_count", len(response_json.get("message", dict()).get("content", "")) ) - model_response["usage"] = litellm.Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response @@ -346,8 +358,8 @@ def ollama_completion_stream(url, data, logging_obj): ], ) model_response = first_chunk - model_response["choices"][0]["delta"] = delta - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].delta = delta # type: ignore + model_response.choices[0].finish_reason = "tool_calls" yield model_response else: for transformed_chunk in streamwrapper: @@ -401,8 +413,8 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob ], ) model_response = first_chunk - model_response["choices"][0]["delta"] = delta - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].delta = delta # type: ignore + model_response.choices[0].finish_reason = "tool_calls" yield model_response else: async for transformed_chunk in streamwrapper: @@ -418,7 +430,9 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob raise e -async def ollama_acompletion(url, data, model_response, encoding, logging_obj): +async def ollama_acompletion( + url, data, model_response: litellm.ModelResponse, encoding, logging_obj +): data["stream"] = False try: timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes @@ -442,7 +456,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): response_json = await resp.json() ## RESPONSE OBJECT - model_response["choices"][0]["finish_reason"] = "stop" + model_response.choices[0].finish_reason = "stop" if data.get("format", "") == "json": function_call = json.loads(response_json["response"]) message = litellm.Message( @@ -451,30 +465,34 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): { "id": f"call_{str(uuid.uuid4())}", "function": { - "name": function_call.get("name", function_call.get("function", None)), + "name": function_call.get( + "name", function_call.get("function", None) + ), "arguments": json.dumps(function_call["arguments"]), }, "type": "function", } ], ) - model_response["choices"][0]["message"] = message - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].message = message # type: ignore + model_response.choices[0].finish_reason = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json[ - "response" - ] - model_response["created"] = int(time.time()) - model_response["model"] = "ollama/" + data["model"] + model_response.choices[0].message.content = response_json["response"] # type: ignore + model_response.created = int(time.time()) + model_response.model = "ollama/" + data["model"] prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore completion_tokens = response_json.get( "eval_count", len(response_json.get("message", dict()).get("content", "")), ) - model_response["usage"] = litellm.Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response except Exception as e: @@ -491,9 +509,9 @@ async def ollama_aembeddings( api_base: str, model: str, prompts: list, + model_response: litellm.EmbeddingResponse, optional_params=None, logging_obj=None, - model_response=None, encoding=None, ): if api_base.endswith("/api/embeddings"): @@ -554,13 +572,19 @@ async def ollama_aembeddings( input_tokens = len(encoding.encode(prompt)) total_input_tokens += input_tokens - model_response["object"] = "list" - model_response["data"] = output_data - model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": total_input_tokens, - "total_tokens": total_input_tokens, - } + model_response.object = "list" + model_response.data = output_data + model_response.model = model + setattr( + model_response, + "usage", + litellm.Usage( + **{ + "prompt_tokens": total_input_tokens, + "total_tokens": total_input_tokens, + } + ), + ) return model_response diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index bb053f5e8..ebd0f22fb 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -1,15 +1,17 @@ -from itertools import chain -import requests -import types -import time import json -import uuid +import time import traceback +import types +import uuid +from itertools import chain from typing import Optional -from litellm import verbose_logger -import litellm -import httpx + import aiohttp +import httpx +import requests + +import litellm +from litellm import verbose_logger class OllamaError(Exception): @@ -195,6 +197,7 @@ class OllamaChatConfig: # ollama implementation def get_ollama_response( + model_response: litellm.ModelResponse, api_base="http://localhost:11434", api_key: Optional[str] = None, model="llama2", @@ -202,7 +205,6 @@ def get_ollama_response( optional_params=None, logging_obj=None, acompletion: bool = False, - model_response=None, encoding=None, ): if api_base.endswith("/api/chat"): @@ -295,7 +297,7 @@ def get_ollama_response( response_json = response.json() ## RESPONSE OBJECT - model_response["choices"][0]["finish_reason"] = "stop" + model_response.choices[0].finish_reason = "stop" if data.get("format", "") == "json": function_call = json.loads(response_json["message"]["content"]) message = litellm.Message( @@ -311,22 +313,24 @@ def get_ollama_response( } ], ) - model_response["choices"][0]["message"] = message - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].message = message # type: ignore + model_response.choices[0].finish_reason = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json["message"][ - "content" - ] - model_response["created"] = int(time.time()) - model_response["model"] = "ollama/" + model + model_response.choices[0].message.content = response_json["message"]["content"] # type: ignore + model_response.created = int(time.time()) + model_response.model = "ollama/" + model prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore completion_tokens = response_json.get( "eval_count", litellm.token_counter(text=response_json["message"]["content"]) ) - model_response["usage"] = litellm.Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response @@ -379,8 +383,8 @@ def ollama_completion_stream(url, api_key, data, logging_obj): ], ) model_response = first_chunk - model_response["choices"][0]["delta"] = delta - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].delta = delta # type: ignore + model_response.choices[0].finish_reason = "tool_calls" yield model_response else: for transformed_chunk in streamwrapper: @@ -434,7 +438,9 @@ async def ollama_async_streaming( { "id": f"call_{str(uuid.uuid4())}", "function": { - "name": function_call.get("name", function_call.get("function", None)), + "name": function_call.get( + "name", function_call.get("function", None) + ), "arguments": json.dumps(function_call["arguments"]), }, "type": "function", @@ -442,8 +448,8 @@ async def ollama_async_streaming( ], ) model_response = first_chunk - model_response["choices"][0]["delta"] = delta - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].delta = delta # type: ignore + model_response.choices[0].finish_reason = "tool_calls" yield model_response else: async for transformed_chunk in streamwrapper: @@ -457,7 +463,7 @@ async def ollama_acompletion( url, api_key: Optional[str], data, - model_response, + model_response: litellm.ModelResponse, encoding, logging_obj, function_name, @@ -492,7 +498,7 @@ async def ollama_acompletion( ) ## RESPONSE OBJECT - model_response["choices"][0]["finish_reason"] = "stop" + model_response.choices[0].finish_reason = "stop" if data.get("format", "") == "json": function_call = json.loads(response_json["message"]["content"]) message = litellm.Message( @@ -510,15 +516,17 @@ async def ollama_acompletion( } ], ) - model_response["choices"][0]["message"] = message - model_response["choices"][0]["finish_reason"] = "tool_calls" + model_response.choices[0].message = message # type: ignore + model_response.choices[0].finish_reason = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json[ + model_response.choices[0].message.content = response_json[ # type: ignore "message" - ]["content"] + ][ + "content" + ] - model_response["created"] = int(time.time()) - model_response["model"] = "ollama_chat/" + data["model"] + model_response.created = int(time.time()) + model_response.model = "ollama_chat/" + data["model"] prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore completion_tokens = response_json.get( "eval_count", @@ -526,10 +534,14 @@ async def ollama_acompletion( text=response_json["message"]["content"], count_response_tokens=True ), ) - model_response["usage"] = litellm.Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + setattr( + model_response, + "usage", + litellm.Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) return model_response except Exception as e: diff --git a/litellm/llms/oobabooga.py b/litellm/llms/oobabooga.py index f8f32e0fe..79d918667 100644 --- a/litellm/llms/oobabooga.py +++ b/litellm/llms/oobabooga.py @@ -1,11 +1,14 @@ -import os import json -from enum import Enum -import requests # type: ignore +import os import time +from enum import Enum from typing import Callable, Optional -from litellm.utils import ModelResponse, Usage -from .prompt_templates.factory import prompt_factory, custom_prompt + +import requests # type: ignore + +from litellm.utils import EmbeddingResponse, ModelResponse, Usage + +from .prompt_templates.factory import custom_prompt, prompt_factory class OobaboogaError(Exception): @@ -99,17 +102,15 @@ def completion( ) else: try: - model_response["choices"][0]["message"]["content"] = ( - completion_response["choices"][0]["message"]["content"] - ) + model_response.choices[0].message.content = completion_response["choices"][0]["message"]["content"] # type: ignore except: raise OobaboogaError( message=json.dumps(completion_response), status_code=response.status_code, ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=completion_response["usage"]["prompt_tokens"], completion_tokens=completion_response["usage"]["completion_tokens"], @@ -122,10 +123,10 @@ def completion( def embedding( model: str, input: list, + model_response: EmbeddingResponse, api_key: Optional[str] = None, api_base: Optional[str] = None, logging_obj=None, - model_response=None, optional_params=None, encoding=None, ): @@ -166,7 +167,7 @@ def embedding( ) # Process response data - model_response["data"] = [ + model_response.data = [ { "embedding": completion_response["data"][0]["embedding"], "index": 0, @@ -176,8 +177,12 @@ def embedding( num_tokens = len(completion_response["data"][0]["embedding"]) # Adding metadata to response - model_response.usage = Usage(prompt_tokens=num_tokens, total_tokens=num_tokens) - model_response["object"] = "list" - model_response["model"] = model + setattr( + model_response, + "usage", + Usage(prompt_tokens=num_tokens, total_tokens=num_tokens), + ) + model_response.object = "list" + model_response.model = model return model_response diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index e6056691e..0a2b6a3fa 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -18,6 +18,7 @@ import httpx import openai from openai import AsyncOpenAI, OpenAI from openai.types.beta.assistant_deleted import AssistantDeleted +from openai.types.file_deleted import FileDeleted from pydantic import BaseModel from typing_extensions import overload, override @@ -2064,6 +2065,151 @@ class OpenAIFilesAPI(BaseLLM): return response + async def aretrieve_file( + self, + file_id: str, + openai_client: AsyncOpenAI, + ) -> FileObject: + response = await openai_client.files.retrieve(file_id=file_id) + return response + + def retrieve_file( + self, + _is_async: bool, + file_id: str, + api_base: str, + api_key: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ): + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.aretrieve_file( # type: ignore + file_id=file_id, + openai_client=openai_client, + ) + response = openai_client.files.retrieve(file_id=file_id) + + return response + + async def adelete_file( + self, + file_id: str, + openai_client: AsyncOpenAI, + ) -> FileDeleted: + response = await openai_client.files.delete(file_id=file_id) + return response + + def delete_file( + self, + _is_async: bool, + file_id: str, + api_base: str, + api_key: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ): + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.adelete_file( # type: ignore + file_id=file_id, + openai_client=openai_client, + ) + response = openai_client.files.delete(file_id=file_id) + + return response + + async def alist_files( + self, + openai_client: AsyncOpenAI, + purpose: Optional[str] = None, + ): + if isinstance(purpose, str): + response = await openai_client.files.list(purpose=purpose) + else: + response = await openai_client.files.list() + return response + + def list_files( + self, + _is_async: bool, + api_base: str, + api_key: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + purpose: Optional[str] = None, + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ): + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.alist_files( # type: ignore + purpose=purpose, + openai_client=openai_client, + ) + + if isinstance(purpose, str): + response = openai_client.files.list(purpose=purpose) + else: + response = openai_client.files.list() + + return response + class OpenAIBatchesAPI(BaseLLM): """ diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py index 4d9953e77..b750b800b 100644 --- a/litellm/llms/palm.py +++ b/litellm/llms/palm.py @@ -1,12 +1,14 @@ -import types -import traceback import copy import time +import traceback +import types from typing import Callable, Optional -from litellm.utils import ModelResponse, Choices, Message, Usage -import litellm + import httpx + +import litellm from litellm import verbose_logger +from litellm.utils import Choices, Message, ModelResponse, Usage class PalmError(Exception): @@ -164,7 +166,7 @@ def completion( message_obj = Message(content=None) choice_obj = Choices(index=idx + 1, message=message_obj) choices_list.append(choice_obj) - model_response["choices"] = choices_list + model_response.choices = choices_list # type: ignore except Exception as e: verbose_logger.error( "litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e)) @@ -188,8 +190,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = "palm/" + model + model_response.created = int(time.time()) + model_response.model = "palm/" + model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index 334b80d38..be9f92f07 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -1,12 +1,16 @@ -import os, types import json -from enum import Enum -import requests # type: ignore +import os import time +import types +from enum import Enum from typing import Callable, Optional + +import requests # type: ignore + import litellm from litellm.utils import ModelResponse, Usage -from .prompt_templates.factory import prompt_factory, custom_prompt + +from .prompt_templates.factory import custom_prompt, prompt_factory class PetalsError(Exception): @@ -151,8 +155,8 @@ def completion( else: try: import torch - from transformers import AutoTokenizer from petals import AutoDistributedModelForCausalLM # type: ignore + from transformers import AutoTokenizer except: raise Exception( "Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals" @@ -189,15 +193,15 @@ def completion( output_text = tokenizer.decode(outputs[0]) if len(output_text) > 0: - model_response["choices"][0]["message"]["content"] = output_text + model_response.choices[0].message.content = output_text # type: ignore prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py index 534f8e26f..d028cb107 100644 --- a/litellm/llms/predibase.py +++ b/litellm/llms/predibase.py @@ -279,7 +279,7 @@ class PredibaseChatCompletion(BaseLLM): message=f"'generated_text' is not a key response dictionary - {completion_response}", ) if len(completion_response["generated_text"]) > 0: - model_response["choices"][0]["message"]["content"] = self.output_parser( + model_response.choices[0].message.content = self.output_parser( # type: ignore completion_response["generated_text"] ) ## GETTING LOGPROBS + FINISH REASON @@ -294,10 +294,10 @@ class PredibaseChatCompletion(BaseLLM): for token in completion_response["details"]["tokens"]: if token["logprob"] is not None: sum_logprob += token["logprob"] - model_response["choices"][0][ - "message" - ]._logprob = ( - sum_logprob # [TODO] move this to using the actual logprobs + setattr( + model_response.choices[0].message, # type: ignore + "_logprob", + sum_logprob, # [TODO] move this to using the actual logprobs ) if "best_of" in optional_params and optional_params["best_of"] > 1: if ( @@ -325,7 +325,7 @@ class PredibaseChatCompletion(BaseLLM): message=message_obj, ) choices_list.append(choice_obj) - model_response["choices"].extend(choices_list) + model_response.choices.extend(choices_list) ## CALCULATING USAGE prompt_tokens = 0 @@ -351,8 +351,8 @@ class PredibaseChatCompletion(BaseLLM): total_tokens = prompt_tokens + completion_tokens - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 77dc52aae..1dd29fd7d 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -388,7 +388,7 @@ def process_response( ## Building RESPONSE OBJECT if len(result) > 1: - model_response["choices"][0]["message"]["content"] = result + model_response.choices[0].message.content = result # type: ignore # Calculate usage prompt_tokens = len(encoding.encode(prompt, disallowed_special=())) @@ -398,7 +398,7 @@ def process_response( disallowed_special=(), ) ) - model_response["model"] = "replicate/" + model + model_response.model = "replicate/" + model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -498,7 +498,7 @@ def completion( ## Step1: Start Prediction: gets a prediction url ## Step2: Poll prediction url for response ## Step2: is handled with and without streaming - model_response["created"] = int( + model_response.created = int( time.time() ) # for pricing this must remain right before calling api diff --git a/litellm/llms/sagemaker.py b/litellm/llms/sagemaker.py index 6892445f0..d16d2bd11 100644 --- a/litellm/llms/sagemaker.py +++ b/litellm/llms/sagemaker.py @@ -1,16 +1,21 @@ -import os, types, traceback -from enum import Enum -import json -import requests # type: ignore -import time -from typing import Callable, Optional, Any -import litellm -from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage -import sys -from copy import deepcopy -import httpx # type: ignore import io -from .prompt_templates.factory import prompt_factory, custom_prompt +import json +import os +import sys +import time +import traceback +import types +from copy import deepcopy +from enum import Enum +from typing import Any, Callable, Optional + +import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.utils import EmbeddingResponse, ModelResponse, Usage, get_secret + +from .prompt_templates.factory import custom_prompt, prompt_factory class SagemakerError(Exception): @@ -377,7 +382,7 @@ def completion( if completion_output.startswith(prompt) and "" in prompt: completion_output = completion_output.replace(prompt, "", 1) - model_response["choices"][0]["message"]["content"] = completion_output + model_response.choices[0].message.content = completion_output # type: ignore except: raise SagemakerError( message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}", @@ -390,8 +395,8 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -597,7 +602,7 @@ async def async_completion( if completion_output.startswith(data["inputs"]) and "" in data["inputs"]: completion_output = completion_output.replace(data["inputs"], "", 1) - model_response["choices"][0]["message"]["content"] = completion_output + model_response.choices[0].message.content = completion_output # type: ignore except: raise SagemakerError( message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}", @@ -610,8 +615,8 @@ async def async_completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -741,16 +746,20 @@ def embedding( {"object": "embedding", "index": idx, "embedding": embedding} ) - model_response["object"] = "list" - model_response["data"] = output_data - model_response["model"] = model + model_response.object = "list" + model_response.data = output_data + model_response.model = model input_tokens = 0 for text in input: input_tokens += len(encoding.encode(text)) - model_response["usage"] = Usage( - prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + setattr( + model_response, + "usage", + Usage( + prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + ), ) return model_response diff --git a/litellm/llms/together_ai.py b/litellm/llms/together_ai.py index 47453ca88..3adbcae37 100644 --- a/litellm/llms/together_ai.py +++ b/litellm/llms/together_ai.py @@ -3,16 +3,20 @@ Deprecated. We now do together ai calls via the openai client. Reference: https://docs.together.ai/docs/openai-api-compatibility """ -import os, types import json -from enum import Enum -import requests # type: ignore +import os import time +import types +from enum import Enum from typing import Callable, Optional -import litellm + import httpx # type: ignore +import requests # type: ignore + +import litellm from litellm.utils import ModelResponse, Usage -from .prompt_templates.factory import prompt_factory, custom_prompt + +from .prompt_templates.factory import custom_prompt, prompt_factory class TogetherAIError(Exception): @@ -91,145 +95,145 @@ class TogetherAIConfig: } -def validate_environment(api_key): - if api_key is None: - raise ValueError( - "Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params" - ) - headers = { - "accept": "application/json", - "content-type": "application/json", - "Authorization": "Bearer " + api_key, - } - return headers +# def validate_environment(api_key): +# if api_key is None: +# raise ValueError( +# "Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params" +# ) +# headers = { +# "accept": "application/json", +# "content-type": "application/json", +# "Authorization": "Bearer " + api_key, +# } +# return headers -def completion( - model: str, - messages: list, - api_base: str, - model_response: ModelResponse, - print_verbose: Callable, - encoding, - api_key, - logging_obj, - custom_prompt_dict={}, - optional_params=None, - litellm_params=None, - logger_fn=None, -): - headers = validate_environment(api_key) +# def completion( +# model: str, +# messages: list, +# api_base: str, +# model_response: ModelResponse, +# print_verbose: Callable, +# encoding, +# api_key, +# logging_obj, +# custom_prompt_dict={}, +# optional_params=None, +# litellm_params=None, +# logger_fn=None, +# ): +# headers = validate_environment(api_key) - ## Load Config - config = litellm.TogetherAIConfig.get_config() - for k, v in config.items(): - if ( - k not in optional_params - ): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in - optional_params[k] = v +# ## Load Config +# config = litellm.TogetherAIConfig.get_config() +# for k, v in config.items(): +# if ( +# k not in optional_params +# ): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in +# optional_params[k] = v - print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}") - if model in custom_prompt_dict: - # check if the model has a registered custom prompt - model_prompt_details = custom_prompt_dict[model] - prompt = custom_prompt( - role_dict=model_prompt_details.get("roles", {}), - initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""), - final_prompt_value=model_prompt_details.get("final_prompt_value", ""), - bos_token=model_prompt_details.get("bos_token", ""), - eos_token=model_prompt_details.get("eos_token", ""), - messages=messages, - ) - else: - prompt = prompt_factory( - model=model, - messages=messages, - api_key=api_key, - custom_llm_provider="together_ai", - ) # api key required to query together ai model list +# print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}") +# if model in custom_prompt_dict: +# # check if the model has a registered custom prompt +# model_prompt_details = custom_prompt_dict[model] +# prompt = custom_prompt( +# role_dict=model_prompt_details.get("roles", {}), +# initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""), +# final_prompt_value=model_prompt_details.get("final_prompt_value", ""), +# bos_token=model_prompt_details.get("bos_token", ""), +# eos_token=model_prompt_details.get("eos_token", ""), +# messages=messages, +# ) +# else: +# prompt = prompt_factory( +# model=model, +# messages=messages, +# api_key=api_key, +# custom_llm_provider="together_ai", +# ) # api key required to query together ai model list - data = { - "model": model, - "prompt": prompt, - "request_type": "language-model-inference", - **optional_params, - } +# data = { +# "model": model, +# "prompt": prompt, +# "request_type": "language-model-inference", +# **optional_params, +# } - ## LOGGING - logging_obj.pre_call( - input=prompt, - api_key=api_key, - additional_args={ - "complete_input_dict": data, - "headers": headers, - "api_base": api_base, - }, - ) - ## COMPLETION CALL - if "stream_tokens" in optional_params and optional_params["stream_tokens"] == True: - response = requests.post( - api_base, - headers=headers, - data=json.dumps(data), - stream=optional_params["stream_tokens"], - ) - return response.iter_lines() - else: - response = requests.post(api_base, headers=headers, data=json.dumps(data)) - ## LOGGING - logging_obj.post_call( - input=prompt, - api_key=api_key, - original_response=response.text, - additional_args={"complete_input_dict": data}, - ) - print_verbose(f"raw model_response: {response.text}") - ## RESPONSE OBJECT - if response.status_code != 200: - raise TogetherAIError( - status_code=response.status_code, message=response.text - ) - completion_response = response.json() +# ## LOGGING +# logging_obj.pre_call( +# input=prompt, +# api_key=api_key, +# additional_args={ +# "complete_input_dict": data, +# "headers": headers, +# "api_base": api_base, +# }, +# ) +# ## COMPLETION CALL +# if "stream_tokens" in optional_params and optional_params["stream_tokens"] == True: +# response = requests.post( +# api_base, +# headers=headers, +# data=json.dumps(data), +# stream=optional_params["stream_tokens"], +# ) +# return response.iter_lines() +# else: +# response = requests.post(api_base, headers=headers, data=json.dumps(data)) +# ## LOGGING +# logging_obj.post_call( +# input=prompt, +# api_key=api_key, +# original_response=response.text, +# additional_args={"complete_input_dict": data}, +# ) +# print_verbose(f"raw model_response: {response.text}") +# ## RESPONSE OBJECT +# if response.status_code != 200: +# raise TogetherAIError( +# status_code=response.status_code, message=response.text +# ) +# completion_response = response.json() - if "error" in completion_response: - raise TogetherAIError( - message=json.dumps(completion_response), - status_code=response.status_code, - ) - elif "error" in completion_response["output"]: - raise TogetherAIError( - message=json.dumps(completion_response["output"]), - status_code=response.status_code, - ) +# if "error" in completion_response: +# raise TogetherAIError( +# message=json.dumps(completion_response), +# status_code=response.status_code, +# ) +# elif "error" in completion_response["output"]: +# raise TogetherAIError( +# message=json.dumps(completion_response["output"]), +# status_code=response.status_code, +# ) - if len(completion_response["output"]["choices"][0]["text"]) >= 0: - model_response["choices"][0]["message"]["content"] = completion_response[ - "output" - ]["choices"][0]["text"] +# if len(completion_response["output"]["choices"][0]["text"]) >= 0: +# model_response.choices[0].message.content = completion_response["output"][ +# "choices" +# ][0]["text"] - ## CALCULATING USAGE - print_verbose( - f"CALCULATING TOGETHERAI TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}" - ) - prompt_tokens = len(encoding.encode(prompt)) - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"].get("content", "")) - ) - if "finish_reason" in completion_response["output"]["choices"][0]: - model_response.choices[0].finish_reason = completion_response["output"][ - "choices" - ][0]["finish_reason"] - model_response["created"] = int(time.time()) - model_response["model"] = "together_ai/" + model - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, - ) - setattr(model_response, "usage", usage) - return model_response +# ## CALCULATING USAGE +# print_verbose( +# f"CALCULATING TOGETHERAI TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}" +# ) +# prompt_tokens = len(encoding.encode(prompt)) +# completion_tokens = len( +# encoding.encode(model_response["choices"][0]["message"].get("content", "")) +# ) +# if "finish_reason" in completion_response["output"]["choices"][0]: +# model_response.choices[0].finish_reason = completion_response["output"][ +# "choices" +# ][0]["finish_reason"] +# model_response["created"] = int(time.time()) +# model_response["model"] = "together_ai/" + model +# usage = Usage( +# prompt_tokens=prompt_tokens, +# completion_tokens=completion_tokens, +# total_tokens=prompt_tokens + completion_tokens, +# ) +# setattr(model_response, "usage", usage) +# return model_response -def embedding(): - # logic for parsing in - calling - parsing out model embedding calls - pass +# def embedding(): +# # logic for parsing in - calling - parsing out model embedding calls +# pass diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index 31fd23202..c891a86ee 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -852,16 +852,14 @@ def completion( ## RESPONSE OBJECT if isinstance(completion_response, litellm.Message): - model_response["choices"][0]["message"] = completion_response + model_response.choices[0].message = completion_response # type: ignore elif len(str(completion_response)) > 0: - model_response["choices"][0]["message"]["content"] = str( - completion_response - ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.choices[0].message.content = str(completion_response) # type: ignore + model_response.created = int(time.time()) + model_response.model = model ## CALCULATING USAGE if model in litellm.vertex_language_models and response_obj is not None: - model_response["choices"][0].finish_reason = map_finish_reason( + model_response.choices[0].finish_reason = map_finish_reason( response_obj.candidates[0].finish_reason.name ) usage = Usage( @@ -912,7 +910,7 @@ async def async_completion( request_str: str, print_verbose: Callable, logging_obj, - encoding=None, + encoding, client_options=None, instances=None, vertex_project=None, @@ -1088,16 +1086,16 @@ async def async_completion( ## RESPONSE OBJECT if isinstance(completion_response, litellm.Message): - model_response["choices"][0]["message"] = completion_response + model_response.choices[0].message = completion_response # type: ignore elif len(str(completion_response)) > 0: - model_response["choices"][0]["message"]["content"] = str( + model_response.choices[0].message.content = str( # type: ignore completion_response ) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model ## CALCULATING USAGE if model in litellm.vertex_language_models and response_obj is not None: - model_response["choices"][0].finish_reason = map_finish_reason( + model_response.choices[0].finish_reason = map_finish_reason( response_obj.candidates[0].finish_reason.name ) usage = Usage( @@ -1377,16 +1375,16 @@ class VertexAITextEmbeddingConfig(BaseModel): def embedding( model: str, input: Union[list, str], + print_verbose, + model_response: litellm.EmbeddingResponse, + optional_params: dict, api_key: Optional[str] = None, logging_obj=None, - model_response=None, - optional_params=None, encoding=None, vertex_project=None, vertex_location=None, vertex_credentials=None, aembedding=False, - print_verbose=None, ): # logic for parsing in - calling - parsing out model embedding calls try: @@ -1484,15 +1482,15 @@ def embedding( "embedding": embedding.values, } ) - input_tokens += embedding.statistics.token_count - model_response["object"] = "list" - model_response["data"] = embedding_response - model_response["model"] = model + input_tokens += embedding.statistics.token_count # type: ignore + model_response.object = "list" + model_response.data = embedding_response + model_response.model = model usage = Usage( prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens ) - model_response.usage = usage + setattr(model_response, "usage", usage) return model_response @@ -1500,8 +1498,8 @@ def embedding( async def async_embedding( model: str, input: Union[list, str], + model_response: litellm.EmbeddingResponse, logging_obj=None, - model_response=None, optional_params=None, encoding=None, client=None, @@ -1541,11 +1539,11 @@ async def async_embedding( ) input_tokens += embedding.statistics.token_count - model_response["object"] = "list" - model_response["data"] = embedding_response - model_response["model"] = model + model_response.object = "list" + model_response.data = embedding_response + model_response.model = model usage = Usage( prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens ) - model_response.usage = usage + setattr(model_response, "usage", usage) return model_response diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index 44a7a448e..b8362d5a5 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -367,8 +367,8 @@ async def async_completion( prompt_tokens = message.usage.input_tokens completion_tokens = message.usage.output_tokens - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/vllm.py b/litellm/llms/vllm.py index b2a9dd54d..f261b7297 100644 --- a/litellm/llms/vllm.py +++ b/litellm/llms/vllm.py @@ -1,11 +1,15 @@ -import os import json +import os +import time # type: ignore from enum import Enum +from typing import Any, Callable + +import httpx import requests # type: ignore -import time, httpx # type: ignore -from typing import Callable, Any + from litellm.utils import ModelResponse, Usage -from .prompt_templates.factory import prompt_factory, custom_prompt + +from .prompt_templates.factory import custom_prompt, prompt_factory llm = None @@ -91,14 +95,14 @@ def completion( ) print_verbose(f"raw model_response: {outputs}") ## RESPONSE OBJECT - model_response["choices"][0]["message"]["content"] = outputs[0].outputs[0].text + model_response.choices[0].message.content = outputs[0].outputs[0].text # type: ignore ## CALCULATING USAGE prompt_tokens = len(outputs[0].prompt_token_ids) completion_tokens = len(outputs[0].outputs[0].token_ids) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -173,14 +177,14 @@ def batch_completions( for output in outputs: model_response = ModelResponse() ## RESPONSE OBJECT - model_response["choices"][0]["message"]["content"] = output.outputs[0].text + model_response.choices[0].message.content = output.outputs[0].text # type: ignore ## CALCULATING USAGE prompt_tokens = len(output.prompt_token_ids) completion_tokens = len(output.outputs[0].token_ids) - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.created = int(time.time()) + model_response.model = model usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, diff --git a/litellm/llms/watsonx.py b/litellm/llms/watsonx.py index 5649b714a..c01efd8ad 100644 --- a/litellm/llms/watsonx.py +++ b/litellm/llms/watsonx.py @@ -25,7 +25,13 @@ import requests # type: ignore import litellm from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler -from litellm.utils import ModelResponse, Usage, get_secret +from litellm.utils import ( + EmbeddingResponse, + ModelResponse, + Usage, + get_secret, + map_finish_reason, +) from .base import BaseLLM from .prompt_templates import factory as ptf @@ -414,14 +420,16 @@ class IBMWatsonXAI(BaseLLM): generated_text = json_resp["results"][0]["generated_text"] prompt_tokens = json_resp["results"][0]["input_token_count"] completion_tokens = json_resp["results"][0]["generated_token_count"] - model_response["choices"][0]["message"]["content"] = generated_text - model_response["finish_reason"] = json_resp["results"][0]["stop_reason"] + model_response.choices[0].message.content = generated_text # type: ignore + model_response.choices[0].finish_reason = map_finish_reason( + json_resp["results"][0]["stop_reason"] + ) if json_resp.get("created_at"): - model_response["created"] = datetime.fromisoformat( - json_resp["created_at"] - ).timestamp() + model_response.created = int( + datetime.fromisoformat(json_resp["created_at"]).timestamp() + ) else: - model_response["created"] = int(time.time()) + model_response.created = int(time.time()) usage = Usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -463,7 +471,7 @@ class IBMWatsonXAI(BaseLLM): prompt = convert_messages_to_prompt( model, messages, provider, custom_prompt_dict ) - model_response["model"] = model + model_response.model = model def process_stream_response( stream_resp: Union[Iterator[str], AsyncIterator], @@ -551,10 +559,10 @@ class IBMWatsonXAI(BaseLLM): raise WatsonXAIError(status_code=500, message=str(e)) def _process_embedding_response( - self, json_resp: dict, model_response: Union[ModelResponse, None] = None - ) -> ModelResponse: + self, json_resp: dict, model_response: Optional[EmbeddingResponse] = None + ) -> EmbeddingResponse: if model_response is None: - model_response = ModelResponse(model=json_resp.get("model_id", None)) + model_response = EmbeddingResponse(model=json_resp.get("model_id", None)) results = json_resp.get("results", []) embedding_response = [] for idx, result in enumerate(results): @@ -565,8 +573,8 @@ class IBMWatsonXAI(BaseLLM): "embedding": result["embedding"], } ) - model_response["object"] = "list" - model_response["data"] = embedding_response + model_response.object = "list" + model_response.data = embedding_response input_tokens = json_resp.get("input_token_count", 0) setattr( model_response, @@ -583,9 +591,9 @@ class IBMWatsonXAI(BaseLLM): self, model: str, input: Union[list, str], + model_response: litellm.EmbeddingResponse, api_key: Optional[str] = None, logging_obj=None, - model_response=None, optional_params=None, encoding=None, print_verbose=None, @@ -602,7 +610,7 @@ class IBMWatsonXAI(BaseLLM): if k not in optional_params: optional_params[k] = v - model_response["model"] = model + model_response.model = model # Load auth variables from environment variables if isinstance(input, str): @@ -635,12 +643,12 @@ class IBMWatsonXAI(BaseLLM): } request_manager = RequestManager(logging_obj) - def handle_embedding(request_params: dict) -> ModelResponse: + def handle_embedding(request_params: dict) -> EmbeddingResponse: with request_manager.request(request_params, input=input) as resp: json_resp = resp.json() return self._process_embedding_response(json_resp, model_response) - async def handle_aembedding(request_params: dict) -> ModelResponse: + async def handle_aembedding(request_params: dict) -> EmbeddingResponse: async with request_manager.async_request( request_params, input=input ) as resp: diff --git a/litellm/main.py b/litellm/main.py index 43e6ad3fc..0aeff3188 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -38,6 +38,7 @@ import dotenv import httpx import openai import tiktoken +from pydantic import BaseModel from typing_extensions import overload import litellm @@ -48,6 +49,7 @@ from litellm import ( # type: ignore get_litellm_params, get_optional_params, ) +from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.utils import ( CustomStreamWrapper, @@ -520,7 +522,7 @@ def mock_completion( ) return response if n is None: - model_response["choices"][0]["message"]["content"] = mock_response + model_response.choices[0].message.content = mock_response # type: ignore else: _all_choices = [] for i in range(n): @@ -531,12 +533,12 @@ def mock_completion( ), ) _all_choices.append(_choice) - model_response["choices"] = _all_choices - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.choices = _all_choices # type: ignore + model_response.created = int(time.time()) + model_response.model = model if mock_tool_calls: - model_response["choices"][0]["message"]["tool_calls"] = [ + model_response.choices[0].message.tool_calls = [ # type: ignore ChatCompletionMessageToolCall(**tool_call) for tool_call in mock_tool_calls ] @@ -1932,51 +1934,7 @@ def completion( """ Deprecated. We now do together ai calls via the openai client - https://docs.together.ai/docs/openai-api-compatibility """ - custom_llm_provider = "together_ai" - together_ai_key = ( - api_key - or litellm.togetherai_api_key - or get_secret("TOGETHER_AI_TOKEN") - or get_secret("TOGETHERAI_API_KEY") - or litellm.api_key - ) - - api_base = ( - api_base - or litellm.api_base - or get_secret("TOGETHERAI_API_BASE") - or "https://api.together.xyz/inference" - ) - - custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict - - model_response = together_ai.completion( - model=model, - messages=messages, - api_base=api_base, - model_response=model_response, - print_verbose=print_verbose, - optional_params=optional_params, - litellm_params=litellm_params, - logger_fn=logger_fn, - encoding=encoding, - api_key=together_ai_key, - logging_obj=logging, - custom_prompt_dict=custom_prompt_dict, - ) - if ( - "stream_tokens" in optional_params - and optional_params["stream_tokens"] == True - ): - # don't try to access stream object, - response = CustomStreamWrapper( - model_response, - model, - custom_llm_provider="together_ai", - logging_obj=logging, - ) - return response - response = model_response + pass elif custom_llm_provider == "palm": palm_api_key = api_key or get_secret("PALM_API_KEY") or litellm.api_key @@ -2459,10 +2417,10 @@ def completion( ## LOGGING generator = ollama.get_ollama_response( - api_base, - model, - prompt, - optional_params, + api_base=api_base, + model=model, + prompt=prompt, + optional_params=optional_params, logging_obj=logging, acompletion=acompletion, model_response=model_response, @@ -2488,11 +2446,11 @@ def completion( ) ## LOGGING generator = ollama_chat.get_ollama_response( - api_base, - api_key, - model, - messages, - optional_params, + api_base=api_base, + api_key=api_key, + model=model, + messages=messages, + optional_params=optional_params, logging_obj=logging, acompletion=acompletion, model_response=model_response, @@ -2670,9 +2628,9 @@ def completion( """ string_response = response_json["data"][0]["output"][0] ## RESPONSE OBJECT - model_response["choices"][0]["message"]["content"] = string_response - model_response["created"] = int(time.time()) - model_response["model"] = model + model_response.choices[0].message.content = string_response # type: ignore + model_response.created = int(time.time()) + model_response.model = model response = model_response else: raise ValueError( @@ -3463,7 +3421,7 @@ def embedding( or api_base or get_secret("OLLAMA_API_BASE") or "http://localhost:11434" - ) + ) # type: ignore if isinstance(input, str): input = [input] if not all(isinstance(item, str) for item in input): @@ -3473,9 +3431,11 @@ def embedding( llm_provider="ollama", # type: ignore ) ollama_embeddings_fn = ( - ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings + ollama.ollama_aembeddings + if aembedding is True + else ollama.ollama_embeddings ) - response = ollama_embeddings_fn( + response = ollama_embeddings_fn( # type: ignore api_base=api_base, model=model, prompts=input, @@ -3943,6 +3903,63 @@ def text_completion( return text_completion_response +###### Adapter Completion ################ + + +async def aadapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]: + """ + Implemented to handle async calls for adapter_completion() + """ + try: + translation_obj: Optional[CustomLogger] = None + for item in litellm.adapters: + if item["id"] == adapter_id: + translation_obj = item["adapter"] + + if translation_obj is None: + raise ValueError( + "No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format( + adapter_id, litellm.adapters + ) + ) + + new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) + + response: ModelResponse = await acompletion(**new_kwargs) # type: ignore + + translated_response = translation_obj.translate_completion_output_params( + response=response + ) + + return translated_response + except Exception as e: + raise e + + +def adapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]: + translation_obj: Optional[CustomLogger] = None + for item in litellm.adapters: + if item["id"] == adapter_id: + translation_obj = item["adapter"] + + if translation_obj is None: + raise ValueError( + "No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format( + adapter_id, litellm.adapters + ) + ) + + new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) + + response: ModelResponse = completion(**new_kwargs) # type: ignore + + translated_response = translation_obj.translate_completion_output_params( + response=response + ) + + return translated_response + + ##### Moderation ####################### diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html new file mode 100644 index 000000000..fcdc3749f --- /dev/null +++ b/litellm/proxy/_experimental/out/404.html @@ -0,0 +1 @@ +404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/LmgW0mreu0hjU2N9CAPDM/_buildManifest.js b/litellm/proxy/_experimental/out/_next/static/aCz2wdplG6aqWrQnod4_6/_buildManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/LmgW0mreu0hjU2N9CAPDM/_buildManifest.js rename to litellm/proxy/_experimental/out/_next/static/aCz2wdplG6aqWrQnod4_6/_buildManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/LmgW0mreu0hjU2N9CAPDM/_ssgManifest.js b/litellm/proxy/_experimental/out/_next/static/aCz2wdplG6aqWrQnod4_6/_ssgManifest.js similarity index 100% rename from litellm/proxy/_experimental/out/_next/static/LmgW0mreu0hjU2N9CAPDM/_ssgManifest.js rename to litellm/proxy/_experimental/out/_next/static/aCz2wdplG6aqWrQnod4_6/_ssgManifest.js diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js b/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js new file mode 100644 index 000000000..92aaed0dd --- /dev/null +++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js @@ -0,0 +1,8 @@ +"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[131],{84174:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(64090),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},50459:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(64090),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},92836:function(e,t,n){n.d(t,{Z:function(){return p}});var a=n(69703),r=n(80991),i=n(2898),o=n(99250),s=n(65492),l=n(64090),c=n(41608),d=n(50027);n(18174),n(21871),n(41213);let u=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(d.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(u("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(u("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},26734:function(e,t,n){n.d(t,{Z:function(){return c}});var a=n(69703),r=n(80991),i=n(99250),o=n(65492),s=n(64090);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:d,className:u}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",u)},p),d)});c.displayName="TabGroup"},41608:function(e,t,n){n.d(t,{O:function(){return c},Z:function(){return u}});var a=n(69703),r=n(64090),i=n(50027);n(18174),n(21871),n(41213);var o=n(80991),s=n(99250);let l=(0,n(65492).fn)("TabList"),c=(0,r.createContext)("line"),d={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},u=r.forwardRef((e,t)=>{let{color:n,variant:u="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",d[u],g)},m),r.createElement(c.Provider,{value:u},r.createElement(i.Z.Provider,{value:n},p)))});u.displayName="TabList"},32126:function(e,t,n){n.d(t,{Z:function(){return d}});var a=n(69703);n(50027);var r=n(18174);n(21871);var i=n(41213),o=n(99250),s=n(65492),l=n(64090);let c=(0,s.fn)("TabPanel"),d=l.forwardRef((e,t)=>{let{children:n,className:s}=e,d=(0,a._T)(e,["children","className"]),{selectedValue:u}=(0,l.useContext)(i.Z),p=u===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},d),n)});d.displayName="TabPanel"},23682:function(e,t,n){n.d(t,{Z:function(){return u}});var a=n(69703),r=n(80991);n(50027);var i=n(18174);n(21871);var o=n(41213),s=n(99250),l=n(65492),c=n(64090);let d=(0,l.fn)("TabPanels"),u=c.forwardRef((e,t)=>{let{children:n,className:l}=e,u=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(d("root"),"w-full",l)},u),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});u.displayName="TabPanels"},50027:function(e,t,n){n.d(t,{Z:function(){return i}});var a=n(64090),r=n(54942);n(99250);let i=(0,a.createContext)(r.fr.Blue)},18174:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)(0)},21871:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)(void 0)},41213:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(64090).createContext)({selectedValue:void 0,handleValueChange:void 0})},21467:function(e,t,n){n.d(t,{i:function(){return s}});var a=n(64090),r=n(44329),i=n(54165),o=n(57499);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,d=a.useRef(null),[u,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=d.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>d.current});return i&&(S=i(S)),a.createElement("div",{ref:d,style:{paddingBottom:u,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},99129:function(e,t,n){let a;n.d(t,{Z:function(){return eY}});var r=n(63787),i=n(64090),o=n(37274),s=n(57499),l=n(54165),c=n(99537),d=n(77136),u=n(20653),p=n(40388),g=n(16480),m=n.n(g),b=n(51761),f=n(47387),E=n(70595),h=n(24750),S=n(89211),y=n(1861),T=n(51350),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:d,actionFn:u}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!u){f();return}if(l){var n;if(t=u(e),d&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(u.length)t=u(o),p.current=!1;else if(!(t=u())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(81303),w=n(14749),k=n(80406),C=n(88804),O=i.createContext({}),x=n(5239),L=n(31506),D=n(91010),P=n(4295),M=n(72480);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(49367),G=n(74084),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,d=e.ariaId,u=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),u&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},u)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:d},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?d:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,d=e.motionName,u=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:d,removeOnLeave:c,ref:b},function(s,l){var c=s.className,d=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:u,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},d),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,d=void 0===c||c,u=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},u),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&d){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(53850);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),d=(0,k.Z)(c,2),u=d[0],p=d[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||u)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||u,autoDestroy:!1,getContainer:n,autoLock:t||u},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(22127),Z=n(86718),X=n(47137),Q=n(92801),J=n(48563);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(17094),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(4678);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:d,cancelButtonProps:u,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:d,cancelButtonProps:u,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(11303),ec=n(13703),ed=n(58854),eu=n(80316),ep=n(76585),eg=n(8985);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,eu.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,ed._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(92935),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:d,className:u,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",d),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,u,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,d="".concat(t,"-confirm");return{[d]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(d,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(d,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(d,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(d,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(d,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(d,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(d,"-error ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(d,"-warning ").concat(d,"-body > ").concat(e.iconCls,",\n ").concat(d,"-confirm ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(d,"-info ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(d,"-success ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(d.Z,null);break;default:S=i.createElement(u.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:d,prefixCls:u,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(u,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===d},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:u,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),d=eO||c.getPrefixCls(),u=a||"".concat(d,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:d,prefixCls:u,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:u,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function u(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,d(s)}return d(s),eC.push(u),{destroy:u,update:function(e){d(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(21467),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:d}=e,u=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==d&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},u,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(79474),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,d]=i.useState(!0),[u,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){d(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);u.onCancel&&a&&u.onCancel.apply(u,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=u.okCancel)&&void 0!==n?n:"confirm"===u.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},u,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=u.afterClose)||void 0===e||e.call(u)},okText:u.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:u.direction||g,cancelText:u.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),d=new Promise(e=>{s=e}),u=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>u,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(u=!0,d.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},13703:function(e,t,n){n.d(t,{J$:function(){return s}});var a=n(8985),r=n(59353);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},44056:function(e){e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,u)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,d)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function d(e){return"-"+e.toLowerCase()}function u(e){return e.charAt(1).toUpperCase()}},31872:function(e,t,n){var a=n(96130),r=n(64730),i=n(61861),o=n(46982),s=n(83671),l=n(53618);e.exports=a([i,r,o,s,l])},83671:function(e,t,n){var a=n(7667),r=n(13585),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},53618:function(e,t,n){var a=n(7667),r=n(13585),i=n(46640),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,d=a.spaceSeparated,u=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:u,acceptCharset:d,accessKey:d,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:d,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:d,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:d,coords:c|u,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:d,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:d,httpEquiv:d,id:null,imageSizes:null,imageSrcSet:u,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:d,itemRef:d,itemScope:o,itemType:d,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:d,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:d,required:o,reversed:o,rows:c,rowSpan:c,sandbox:d,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:u,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:d,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},46640:function(e,t,n){var a=n(25852);e.exports=function(e,t){return a(e,t.toLowerCase())}},25852:function(e){e.exports=function(e,t){return t in e?e[t]:t}},13585:function(e,t,n){var a=n(39900),r=n(94949),i=n(7478);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,d=e.transform,u={},p={};for(t in c)n=new i(t,d(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),u[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(u,p,o)}},7478:function(e,t,n){var a=n(74108),r=n(7667);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,d,u=-1;for(s&&(this.space=s),a.call(this,e,t);++u