allow configuring httpx hooks for AsyncHTTPHandler (#6290) (#6415)

* allow configuring httpx hooks for AsyncHTTPHandler (#6290) Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * Fixes and minor improvements for Helm Chart (#6402) * reckoner hack * fix default * add extracontainers option * revert chart * fix extracontainers * fix deployment * remove init container * update docs * add helm lint to deploy step * change name * (refactor) prometheus async_log_success_event to be under 100 LOC (#6416) * unit testig for prometheus * unit testing for success metrics * use 1 helper for _increment_token_metrics * use helper for _increment_remaining_budget_metrics * use _increment_remaining_budget_metrics * use _increment_top_level_request_and_spend_metrics * use helper for _set_latency_metrics * remove noqa violation * fix test prometheus * test prometheus * unit testing for all prometheus helper functions * fix prom unit tests * fix unit tests prometheus * fix unit test prom * (refactor) router - use static methods for client init utils (#6420) * use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915 * (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend (#6406) * code cleanup remove unused and undocumented code files * fix unused logging integrations cleanup * update chart version * add circleci tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> * fix: fix linting error * fix(http_handler.py): fix linting error --------- Co-authored-by: Alejandro Rodríguez <alejorro70@gmail.com> Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2024-10-24 22:00:24 -07:00 · 2024-10-24 22:00:24 -07:00 · cc8dd80209
commit cc8dd80209
parent 1cd1d23fdf
8 changed files with 60 additions and 86 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -416,15 +416,17 @@ jobs:
          command: |
            python -m pip install --upgrade pip
            pip install ruff
-            pip install pylint  
+            pip install pylint
            pip install pyright
            pip install .
+            curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
      - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
      - run: ruff check ./litellm
      - run: python ./tests/documentation_tests/test_general_setting_keys.py
      - run: python ./tests/code_coverage_tests/router_code_coverage.py
      - run: python ./tests/documentation_tests/test_env_keys.py
-    
+      - run: helm lint ./deploy/charts/litellm-helm
+
  db_migration_disable_update_check:
    machine:
      image: ubuntu-2204:2023.10.1
@ -1099,4 +1101,4 @@ workflows:
            branches:
              only:
                - main
-      
+      
--- a/.github/workflows/ghcr_helm_deploy.yml
+++ b/.github/workflows/ghcr_helm_deploy.yml
@ -50,6 +50,9 @@ jobs:
          current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
          version-fragment: 'bug'

+      - name: Lint helm chart
+        run: helm lint deploy/charts/litellm-helm
+
      - uses: ./.github/actions/helm-oci-chart-releaser
        with:
          name: litellm-helm
@ -61,4 +64,4 @@ jobs:
          registry_username: ${{ github.actor }}
          registry_password: ${{ secrets.GITHUB_TOKEN }}
          update_dependencies: true
-  
+  
--- a/deploy/charts/litellm-helm/Chart.yaml
+++ b/deploy/charts/litellm-helm/Chart.yaml
@ -24,7 +24,7 @@ version: 0.3.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: v1.46.6
+appVersion: v1.50.2

 dependencies:
  - name: "postgresql"
--- a/deploy/charts/litellm-helm/README.md
+++ b/deploy/charts/litellm-helm/README.md
@ -28,14 +28,13 @@ If `db.useStackgresOperator` is used (not yet implemented):
 | `image.repository`                                         | LiteLLM Proxy image repository                                                                                                                                                        | `ghcr.io/berriai/litellm`  |
 | `image.pullPolicy`                                         | LiteLLM Proxy image pull policy                                                                                                                                                       | `IfNotPresent`  |
 | `image.tag`                                                | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published.                                                                             | `""`  |
-| `image.dbReadyImage`                                       | On Pod startup, an initContainer is used to make sure the Postgres database is available before attempting to start LiteLLM.  This field specifies the image to use as that initContainer.  | `docker.io/bitnami/postgresql`  |
-| `image.dbReadyTag`                                         | Tag for the above image.  If not specified, "latest" is used.                                                                                                                         | `""`  |
 | `imagePullSecrets`                                         | Registry credentials for the LiteLLM and initContainer images.                                                                                                                        | `[]`  |
 | `serviceAccount.create`                                    | Whether or not to create a Kubernetes Service Account for this deployment.  The default is `false` because LiteLLM has no need to access the Kubernetes API.                          | `false`  |
 | `service.type`                                             | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.)                                                                                                                      | `ClusterIP`  |
 | `service.port`                                             | TCP port that the Kubernetes Service will listen on.  Also the TCP port within the Pod that the proxy will listen on.                                                                 | `4000`  |
 | `ingress.*`                                                | See [values.yaml](./values.yaml) for example settings                                                                                                                                 | N/A  |
 | `proxy_config.*`                                           | See [values.yaml](./values.yaml) for default settings.  See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples.                            | N/A  |
+| `extraContainers[]`                                        | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy.                                                                                             | `[]`  |

 #### Example `environmentSecrets` Secret 

@ -127,4 +126,4 @@ kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.ma
 At the time of writing, the Admin UI is unable to add models.  This is because
 it would need to update the `config.yaml` file which is a exposed ConfigMap, and
 therefore, read-only.  This is a limitation of this helm chart, not the Admin UI
-itself.
+itself.
--- a/deploy/charts/litellm-helm/templates/deployment.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment.yaml
@ -31,71 +31,6 @@ spec:
      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.podSecurityContext | nindent 8 }}
-      initContainers:
-        - name: db-ready
-          securityContext:
-            {{- toYaml .Values.securityContext | nindent 12 }}
-          image: "{{ .Values.image.dbReadyImage }}:{{ .Values.image.dbReadyTag | default("16.1.0-debian-11-r20") }}"
-          imagePullPolicy: {{ .Values.image.pullPolicy }}
-          env:
-            {{- if .Values.db.deployStandalone }}
-            - name: DATABASE_USERNAME
-              valueFrom:
-                secretKeyRef:
-                  name: {{ include "litellm.fullname" . }}-dbcredentials
-                  key: username
-            - name: PGPASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: {{ include "litellm.fullname" . }}-dbcredentials
-                  key: password
-            - name: DATABASE_HOST
-              value: {{ .Release.Name }}-postgresql
-            - name: DATABASE_NAME
-              value: litellm
-            {{- else if .Values.db.useExisting }}
-            - name: DATABASE_USERNAME
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.db.secret.name }}
-                  key: {{ .Values.db.secret.usernameKey }}
-            - name: PGPASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.db.secret.name }}
-                  key: {{ .Values.db.secret.passwordKey }}
-            - name: DATABASE_HOST
-              value: {{ .Values.db.endpoint }}
-            - name: DATABASE_NAME
-              value: {{ .Values.db.database }}
-            {{- end }}
-          command:
-            - sh
-            - -c
-            - |
-              # Maximum wait time will be (limit * 2) seconds.
-              limit=60
-              current=0
-              ret=1
-              while [ $current -lt $limit ] && [ $ret -ne 0 ]; do
-                echo "Waiting for database to be ready $current"
-                psql -U $(DATABASE_USERNAME) -h $(DATABASE_HOST) -l
-                ret=$?
-                current=$(( $current + 1 ))
-                sleep 2
-              done
-              if [ $ret -eq 0 ]; then
-                echo "Database is ready"
-              else
-                echo "Database failed to become ready before we gave up waiting."
-              fi
-          resources:
-            {{- toYaml .Values.resources | nindent 12 }}
-          {{ if .Values.securityContext.readOnlyRootFilesystem }}
-          volumeMounts:
-            - name: tmp
-              mountPath: /tmp
-          {{ end }}
      containers:
        - name: {{ include "litellm.name" . }}
          securityContext:
@ -203,6 +138,9 @@ spec:
          {{- with .Values.volumeMounts }}
            {{- toYaml . | nindent 12 }}
          {{- end }}
+      {{- with .Values.extraContainers }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
      volumes:
        {{ if .Values.securityContext.readOnlyRootFilesystem }}
        - name: tmp
@ -235,4 +173,4 @@ spec:
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
-      {{- end }}
+      {{- end }}
--- a/deploy/charts/litellm-helm/values.yaml
+++ b/deploy/charts/litellm-helm/values.yaml
@ -7,16 +7,11 @@ replicaCount: 1
 image:
  # Use "ghcr.io/berriai/litellm-database" for optimized image with database
  repository: ghcr.io/berriai/litellm-database
-  pullPolicy: IfNotPresent
+  pullPolicy: Always
  # Overrides the image tag whose default is the chart appVersion.
  # tag: "main-latest"
  tag: ""

-  # Image and tag used for the init container to check and wait for the
-  #  readiness of the postgres database.
-  dbReadyImage: docker.io/bitnami/postgresql
-  dbReadyTag: ""
-
 imagePullSecrets: []
 nameOverride: "litellm"
 fullnameOverride: ""
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -1,7 +1,7 @@
 import asyncio
 import os
 import traceback
-from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union

 import httpx
 from httpx import USE_CLIENT_DEFAULT
@ -32,15 +32,20 @@ class AsyncHTTPHandler:
    def __init__(
        self,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
        concurrent_limit=1000,
    ):
        self.timeout = timeout
+        self.event_hooks = event_hooks
        self.client = self.create_client(
-            timeout=timeout, concurrent_limit=concurrent_limit
+            timeout=timeout, concurrent_limit=concurrent_limit, event_hooks=event_hooks
        )

    def create_client(
-        self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
+        self,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        concurrent_limit: int,
+        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
    ) -> httpx.AsyncClient:

        # SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
@ -55,6 +60,7 @@ class AsyncHTTPHandler:
        # Create a client with a connection pool

        return httpx.AsyncClient(
+            event_hooks=event_hooks,
            timeout=timeout,
            limits=httpx.Limits(
                max_connections=concurrent_limit,
@ -114,7 +120,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
+                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
+            )
            try:
                return await self.single_connection_post_request(
                    url=url,
@ -172,7 +180,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
+                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
+            )
            try:
                return await self.single_connection_post_request(
                    url=url,
@ -229,7 +239,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
+                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
+            )
            try:
                return await self.single_connection_post_request(
                    url=url,
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -15,6 +15,7 @@ sys.path.insert(
 import pytest

 import litellm
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers
 from litellm.proxy.utils import (
    _duration_in_seconds,
    _extract_from_regex,
@ -830,6 +831,29 @@ def test_is_base64_encoded():
    assert is_base64_encoded(s=base64_image) is True


+@mock.patch("httpx.AsyncClient")
+@mock.patch.dict(os.environ, {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"}, clear=True)
+def test_async_http_handler(mock_async_client):
+    import httpx
+
+    timeout = 120
+    event_hooks = {"request": [lambda r: r]}
+    concurrent_limit = 2
+
+    AsyncHTTPHandler(timeout, event_hooks, concurrent_limit)
+
+    mock_async_client.assert_called_with(
+        cert="/client.pem",
+        event_hooks=event_hooks,
+        headers=headers,
+        limits=httpx.Limits(
+            max_connections=concurrent_limit,
+            max_keepalive_connections=concurrent_limit,
+        ),
+        timeout=timeout,
+        verify="/certificate.pem",
+    )
+
@pytest.mark.parametrize(
    "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
 )
@ -842,3 +866,4 @@ def test_supports_audio_input(model, expected_bool):
    supports_pc = supports_audio_input(model=model)

    assert supports_pc == expected_bool
+