allow configuring httpx hooks for AsyncHTTPHandler (#6290) (#6415)

* allow configuring httpx hooks for AsyncHTTPHandler (#6290) Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * Fixes and minor improvements for Helm Chart (#6402) * reckoner hack * fix default * add extracontainers option * revert chart * fix extracontainers * fix deployment * remove init container * update docs * add helm lint to deploy step * change name * (refactor) prometheus async_log_success_event to be under 100 LOC (#6416) * unit testig for prometheus * unit testing for success metrics * use 1 helper for _increment_token_metrics * use helper for _increment_remaining_budget_metrics * use _increment_remaining_budget_metrics * use _increment_top_level_request_and_spend_metrics * use helper for _set_latency_metrics * remove noqa violation * fix test prometheus * test prometheus * unit testing for all prometheus helper functions * fix prom unit tests * fix unit tests prometheus * fix unit test prom * (refactor) router - use static methods for client init utils (#6420) * use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915 * (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend (#6406) * code cleanup remove unused and undocumented code files * fix unused logging integrations cleanup * update chart version * add circleci tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> * fix: fix linting error * fix(http_handler.py): fix linting error --------- Co-authored-by: Alejandro Rodríguez <alejorro70@gmail.com> Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2024-10-24 22:00:24 -07:00 · 2024-10-24 22:00:24 -07:00 · cc8dd80209
commit cc8dd80209
parent 1cd1d23fdf
8 changed files with 60 additions and 86 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -416,15 +416,17 @@ jobs:
          command: |
            python -m pip install --upgrade pip
            pip install ruff
-            pip install pylint  
+            pip install pylint
            pip install pyright
            pip install .
            curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
      - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
      - run: ruff check ./litellm
      - run: python ./tests/documentation_tests/test_general_setting_keys.py
      - run: python ./tests/code_coverage_tests/router_code_coverage.py
      - run: python ./tests/documentation_tests/test_env_keys.py
-    
+      - run: helm lint ./deploy/charts/litellm-helm
  db_migration_disable_update_check:
    machine:
      image: ubuntu-2204:2023.10.1
@ -1099,4 +1101,4 @@ workflows:
            branches:
              only:
                - main
-      
+      
--- a/.github/workflows/ghcr_helm_deploy.yml
+++ b/.github/workflows/ghcr_helm_deploy.yml
@ -50,6 +50,9 @@ jobs:
          current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
          version-fragment: 'bug'
      - name: Lint helm chart
        run: helm lint deploy/charts/litellm-helm
      - uses: ./.github/actions/helm-oci-chart-releaser
        with:
          name: litellm-helm
@ -61,4 +64,4 @@ jobs:
          registry_username: ${{ github.actor }}
          registry_password: ${{ secrets.GITHUB_TOKEN }}
          update_dependencies: true
-  
+  
--- a/deploy/charts/litellm-helm/Chart.yaml
+++ b/deploy/charts/litellm-helm/Chart.yaml
@ -24,7 +24,7 @@ version: 0.3.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: v1.46.6
+appVersion: v1.50.2
 dependencies:
  - name: "postgresql"
--- a/deploy/charts/litellm-helm/README.md
+++ b/deploy/charts/litellm-helm/README.md
@ -28,14 +28,13 @@ If `db.useStackgresOperator` is used (not yet implemented):
 | `image.repository`                                         | LiteLLM Proxy image repository                                                                                                                                                        | `ghcr.io/berriai/litellm`  |
 | `image.pullPolicy`                                         | LiteLLM Proxy image pull policy                                                                                                                                                       | `IfNotPresent`  |
 | `image.tag`                                                | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published.                                                                             | `""`  |
 | `image.dbReadyImage`                                       | On Pod startup, an initContainer is used to make sure the Postgres database is available before attempting to start LiteLLM.  This field specifies the image to use as that initContainer.  | `docker.io/bitnami/postgresql`  |
 | `image.dbReadyTag`                                         | Tag for the above image.  If not specified, "latest" is used.                                                                                                                         | `""`  |
 | `imagePullSecrets`                                         | Registry credentials for the LiteLLM and initContainer images.                                                                                                                        | `[]`  |
 | `serviceAccount.create`                                    | Whether or not to create a Kubernetes Service Account for this deployment.  The default is `false` because LiteLLM has no need to access the Kubernetes API.                          | `false`  |
 | `service.type`                                             | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.)                                                                                                                      | `ClusterIP`  |
 | `service.port`                                             | TCP port that the Kubernetes Service will listen on.  Also the TCP port within the Pod that the proxy will listen on.                                                                 | `4000`  |
 | `ingress.*`                                                | See [values.yaml](./values.yaml) for example settings                                                                                                                                 | N/A  |
 | `proxy_config.*`                                           | See [values.yaml](./values.yaml) for default settings.  See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples.                            | N/A  |
 | `extraContainers[]`                                        | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy.                                                                                             | `[]`  |
 #### Example `environmentSecrets` Secret 
@ -127,4 +126,4 @@ kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.ma
 At the time of writing, the Admin UI is unable to add models.  This is because
 it would need to update the `config.yaml` file which is a exposed ConfigMap, and
 therefore, read-only.  This is a limitation of this helm chart, not the Admin UI
-itself.
+itself.
--- a/deploy/charts/litellm-helm/templates/deployment.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment.yaml
@ -31,71 +31,6 @@ spec:
      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.podSecurityContext | nindent 8 }}
      initContainers:
        - name: db-ready
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12 }}
          image: "{{ .Values.image.dbReadyImage }}:{{ .Values.image.dbReadyTag | default("16.1.0-debian-11-r20") }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          env:
            {{- if .Values.db.deployStandalone }}
            - name: DATABASE_USERNAME
              valueFrom:
                secretKeyRef:
                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: username
            - name: PGPASSWORD
              valueFrom:
                secretKeyRef:
                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: password
            - name: DATABASE_HOST
              value: {{ .Release.Name }}-postgresql
            - name: DATABASE_NAME
              value: litellm
            {{- else if .Values.db.useExisting }}
            - name: DATABASE_USERNAME
              valueFrom:
                secretKeyRef:
                  name: {{ .Values.db.secret.name }}
                  key: {{ .Values.db.secret.usernameKey }}
            - name: PGPASSWORD
              valueFrom:
                secretKeyRef:
                  name: {{ .Values.db.secret.name }}
                  key: {{ .Values.db.secret.passwordKey }}
            - name: DATABASE_HOST
              value: {{ .Values.db.endpoint }}
            - name: DATABASE_NAME
              value: {{ .Values.db.database }}
            {{- end }}
          command:
            - sh
            - -c
            - |
              # Maximum wait time will be (limit * 2) seconds.
              limit=60
              current=0
              ret=1
              while [ $current -lt $limit ] && [ $ret -ne 0 ]; do
                echo "Waiting for database to be ready $current"
                psql -U $(DATABASE_USERNAME) -h $(DATABASE_HOST) -l
                ret=$?
                current=$(( $current + 1 ))
                sleep 2
              done
              if [ $ret -eq 0 ]; then
                echo "Database is ready"
              else
                echo "Database failed to become ready before we gave up waiting."
              fi
          resources:
            {{- toYaml .Values.resources | nindent 12 }}
          {{ if .Values.securityContext.readOnlyRootFilesystem }}
          volumeMounts:
            - name: tmp
              mountPath: /tmp
          {{ end }}
      containers:
        - name: {{ include "litellm.name" . }}
          securityContext:
@ -203,6 +138,9 @@ spec:
          {{- with .Values.volumeMounts }}
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.extraContainers }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      volumes:
        {{ if .Values.securityContext.readOnlyRootFilesystem }}
        - name: tmp
@ -235,4 +173,4 @@ spec:
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
-      {{- end }}
+      {{- end }}
--- a/deploy/charts/litellm-helm/values.yaml
+++ b/deploy/charts/litellm-helm/values.yaml
@ -7,16 +7,11 @@ replicaCount: 1
 image:
  # Use "ghcr.io/berriai/litellm-database" for optimized image with database
  repository: ghcr.io/berriai/litellm-database
-  pullPolicy: IfNotPresent
+  pullPolicy: Always
  # Overrides the image tag whose default is the chart appVersion.
  # tag: "main-latest"
  tag: ""
  # Image and tag used for the init container to check and wait for the
  #  readiness of the postgres database.
  dbReadyImage: docker.io/bitnami/postgresql
  dbReadyTag: ""
 imagePullSecrets: []
 nameOverride: "litellm"
 fullnameOverride: ""
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -1,7 +1,7 @@
 import asyncio
 import os
 import traceback
-from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
 import httpx
 from httpx import USE_CLIENT_DEFAULT
@ -32,15 +32,20 @@ class AsyncHTTPHandler:
    def __init__(
        self,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
        concurrent_limit=1000,
    ):
        self.timeout = timeout
        self.event_hooks = event_hooks
        self.client = self.create_client(
-            timeout=timeout, concurrent_limit=concurrent_limit
+            timeout=timeout, concurrent_limit=concurrent_limit, event_hooks=event_hooks
        )
    def create_client(
-        self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
+        self,
        timeout: Optional[Union[float, httpx.Timeout]],
        concurrent_limit: int,
        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
    ) -> httpx.AsyncClient:
        # SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
@ -55,6 +60,7 @@ class AsyncHTTPHandler:
        # Create a client with a connection pool
        return httpx.AsyncClient(
            event_hooks=event_hooks,
            timeout=timeout,
            limits=httpx.Limits(
                max_connections=concurrent_limit,
@ -114,7 +120,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
@ -172,7 +180,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
@ -229,7 +239,9 @@ class AsyncHTTPHandler:
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
-            new_client = self.create_client(timeout=timeout, concurrent_limit=1)
+            new_client = self.create_client(
                timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -15,6 +15,7 @@ sys.path.insert(
 import pytest
 import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers
 from litellm.proxy.utils import (
    _duration_in_seconds,
    _extract_from_regex,
@ -830,6 +831,29 @@ def test_is_base64_encoded():
    assert is_base64_encoded(s=base64_image) is True
@mock.patch("httpx.AsyncClient")
@mock.patch.dict(os.environ, {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"}, clear=True)
 def test_async_http_handler(mock_async_client):
    import httpx
    timeout = 120
    event_hooks = {"request": [lambda r: r]}
    concurrent_limit = 2
    AsyncHTTPHandler(timeout, event_hooks, concurrent_limit)
    mock_async_client.assert_called_with(
        cert="/client.pem",
        event_hooks=event_hooks,
        headers=headers,
        limits=httpx.Limits(
            max_connections=concurrent_limit,
            max_keepalive_connections=concurrent_limit,
        ),
        timeout=timeout,
        verify="/certificate.pem",
    )
@pytest.mark.parametrize(
    "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
 )
@ -842,3 +866,4 @@ def test_supports_audio_input(model, expected_bool):
    supports_pc = supports_audio_input(model=model)
    assert supports_pc == expected_bool