diff --git a/.circleci/config.yml b/.circleci/config.yml index c84f5d941..6bbc95b5a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -416,15 +416,17 @@ jobs: command: | python -m pip install --upgrade pip pip install ruff - pip install pylint + pip install pylint pip install pyright pip install . + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) - run: ruff check ./litellm - run: python ./tests/documentation_tests/test_general_setting_keys.py - run: python ./tests/code_coverage_tests/router_code_coverage.py - run: python ./tests/documentation_tests/test_env_keys.py - + - run: helm lint ./deploy/charts/litellm-helm + db_migration_disable_update_check: machine: image: ubuntu-2204:2023.10.1 @@ -1099,4 +1101,4 @@ workflows: branches: only: - main - \ No newline at end of file + diff --git a/.github/workflows/ghcr_helm_deploy.yml b/.github/workflows/ghcr_helm_deploy.yml index 35ea96bfb..f78dc6f0f 100644 --- a/.github/workflows/ghcr_helm_deploy.yml +++ b/.github/workflows/ghcr_helm_deploy.yml @@ -50,6 +50,9 @@ jobs: current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }} version-fragment: 'bug' + - name: Lint helm chart + run: helm lint deploy/charts/litellm-helm + - uses: ./.github/actions/helm-oci-chart-releaser with: name: litellm-helm @@ -61,4 +64,4 @@ jobs: registry_username: ${{ github.actor }} registry_password: ${{ secrets.GITHUB_TOKEN }} update_dependencies: true - \ No newline at end of file + diff --git a/deploy/charts/litellm-helm/Chart.yaml b/deploy/charts/litellm-helm/Chart.yaml index 594bbd76d..6232a2320 100644 --- a/deploy/charts/litellm-helm/Chart.yaml +++ b/deploy/charts/litellm-helm/Chart.yaml @@ -24,7 +24,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: v1.46.6 +appVersion: v1.50.2 dependencies: - name: "postgresql" diff --git a/deploy/charts/litellm-helm/README.md b/deploy/charts/litellm-helm/README.md index 02fccdf03..8b2196f57 100644 --- a/deploy/charts/litellm-helm/README.md +++ b/deploy/charts/litellm-helm/README.md @@ -28,14 +28,13 @@ If `db.useStackgresOperator` is used (not yet implemented): | `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` | | `image.pullPolicy` | LiteLLM Proxy image pull policy | `IfNotPresent` | | `image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` | -| `image.dbReadyImage` | On Pod startup, an initContainer is used to make sure the Postgres database is available before attempting to start LiteLLM. This field specifies the image to use as that initContainer. | `docker.io/bitnami/postgresql` | -| `image.dbReadyTag` | Tag for the above image. If not specified, "latest" is used. | `""` | | `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` | | `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` | | `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` | | `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` | | `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A | | `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A | +| `extraContainers[]` | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy. | `[]` | #### Example `environmentSecrets` Secret @@ -127,4 +126,4 @@ kubectl -n litellm get secret -litellm-masterkey -o jsonpath="{.data.ma At the time of writing, the Admin UI is unable to add models. This is because it would need to update the `config.yaml` file which is a exposed ConfigMap, and therefore, read-only. This is a limitation of this helm chart, not the Admin UI -itself. \ No newline at end of file +itself. diff --git a/deploy/charts/litellm-helm/templates/deployment.yaml b/deploy/charts/litellm-helm/templates/deployment.yaml index 7e5faac3c..7f4e87653 100644 --- a/deploy/charts/litellm-helm/templates/deployment.yaml +++ b/deploy/charts/litellm-helm/templates/deployment.yaml @@ -31,71 +31,6 @@ spec: serviceAccountName: {{ include "litellm.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - - name: db-ready - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.dbReadyImage }}:{{ .Values.image.dbReadyTag | default("16.1.0-debian-11-r20") }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - env: - {{- if .Values.db.deployStandalone }} - - name: DATABASE_USERNAME - valueFrom: - secretKeyRef: - name: {{ include "litellm.fullname" . }}-dbcredentials - key: username - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: {{ include "litellm.fullname" . }}-dbcredentials - key: password - - name: DATABASE_HOST - value: {{ .Release.Name }}-postgresql - - name: DATABASE_NAME - value: litellm - {{- else if .Values.db.useExisting }} - - name: DATABASE_USERNAME - valueFrom: - secretKeyRef: - name: {{ .Values.db.secret.name }} - key: {{ .Values.db.secret.usernameKey }} - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.db.secret.name }} - key: {{ .Values.db.secret.passwordKey }} - - name: DATABASE_HOST - value: {{ .Values.db.endpoint }} - - name: DATABASE_NAME - value: {{ .Values.db.database }} - {{- end }} - command: - - sh - - -c - - | - # Maximum wait time will be (limit * 2) seconds. - limit=60 - current=0 - ret=1 - while [ $current -lt $limit ] && [ $ret -ne 0 ]; do - echo "Waiting for database to be ready $current" - psql -U $(DATABASE_USERNAME) -h $(DATABASE_HOST) -l - ret=$? - current=$(( $current + 1 )) - sleep 2 - done - if [ $ret -eq 0 ]; then - echo "Database is ready" - else - echo "Database failed to become ready before we gave up waiting." - fi - resources: - {{- toYaml .Values.resources | nindent 12 }} - {{ if .Values.securityContext.readOnlyRootFilesystem }} - volumeMounts: - - name: tmp - mountPath: /tmp - {{ end }} containers: - name: {{ include "litellm.name" . }} securityContext: @@ -203,6 +138,9 @@ spec: {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} volumes: {{ if .Values.securityContext.readOnlyRootFilesystem }} - name: tmp @@ -235,4 +173,4 @@ spec: {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} - {{- end }} \ No newline at end of file + {{- end }} diff --git a/deploy/charts/litellm-helm/values.yaml b/deploy/charts/litellm-helm/values.yaml index 0e11c3f61..a2c55f2fa 100644 --- a/deploy/charts/litellm-helm/values.yaml +++ b/deploy/charts/litellm-helm/values.yaml @@ -7,16 +7,11 @@ replicaCount: 1 image: # Use "ghcr.io/berriai/litellm-database" for optimized image with database repository: ghcr.io/berriai/litellm-database - pullPolicy: IfNotPresent + pullPolicy: Always # Overrides the image tag whose default is the chart appVersion. # tag: "main-latest" tag: "" - # Image and tag used for the init container to check and wait for the - # readiness of the postgres database. - dbReadyImage: docker.io/bitnami/postgresql - dbReadyTag: "" - imagePullSecrets: [] nameOverride: "litellm" fullnameOverride: "" diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index a2b592ef8..89b294584 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -1,7 +1,7 @@ import asyncio import os import traceback -from typing import TYPE_CHECKING, Any, Mapping, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union import httpx from httpx import USE_CLIENT_DEFAULT @@ -32,15 +32,20 @@ class AsyncHTTPHandler: def __init__( self, timeout: Optional[Union[float, httpx.Timeout]] = None, + event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None, concurrent_limit=1000, ): self.timeout = timeout + self.event_hooks = event_hooks self.client = self.create_client( - timeout=timeout, concurrent_limit=concurrent_limit + timeout=timeout, concurrent_limit=concurrent_limit, event_hooks=event_hooks ) def create_client( - self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int + self, + timeout: Optional[Union[float, httpx.Timeout]], + concurrent_limit: int, + event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]], ) -> httpx.AsyncClient: # SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts. @@ -55,6 +60,7 @@ class AsyncHTTPHandler: # Create a client with a connection pool return httpx.AsyncClient( + event_hooks=event_hooks, timeout=timeout, limits=httpx.Limits( max_connections=concurrent_limit, @@ -114,7 +120,9 @@ class AsyncHTTPHandler: return response except (httpx.RemoteProtocolError, httpx.ConnectError): # Retry the request with a new session if there is a connection error - new_client = self.create_client(timeout=timeout, concurrent_limit=1) + new_client = self.create_client( + timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks + ) try: return await self.single_connection_post_request( url=url, @@ -172,7 +180,9 @@ class AsyncHTTPHandler: return response except (httpx.RemoteProtocolError, httpx.ConnectError): # Retry the request with a new session if there is a connection error - new_client = self.create_client(timeout=timeout, concurrent_limit=1) + new_client = self.create_client( + timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks + ) try: return await self.single_connection_post_request( url=url, @@ -229,7 +239,9 @@ class AsyncHTTPHandler: return response except (httpx.RemoteProtocolError, httpx.ConnectError): # Retry the request with a new session if there is a connection error - new_client = self.create_client(timeout=timeout, concurrent_limit=1) + new_client = self.create_client( + timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks + ) try: return await self.single_connection_post_request( url=url, diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py index 684e41da8..9c26da614 100644 --- a/tests/local_testing/test_utils.py +++ b/tests/local_testing/test_utils.py @@ -15,6 +15,7 @@ sys.path.insert( import pytest import litellm +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers from litellm.proxy.utils import ( _duration_in_seconds, _extract_from_regex, @@ -830,6 +831,29 @@ def test_is_base64_encoded(): assert is_base64_encoded(s=base64_image) is True +@mock.patch("httpx.AsyncClient") +@mock.patch.dict(os.environ, {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"}, clear=True) +def test_async_http_handler(mock_async_client): + import httpx + + timeout = 120 + event_hooks = {"request": [lambda r: r]} + concurrent_limit = 2 + + AsyncHTTPHandler(timeout, event_hooks, concurrent_limit) + + mock_async_client.assert_called_with( + cert="/client.pem", + event_hooks=event_hooks, + headers=headers, + limits=httpx.Limits( + max_connections=concurrent_limit, + max_keepalive_connections=concurrent_limit, + ), + timeout=timeout, + verify="/certificate.pem", + ) + @pytest.mark.parametrize( "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)] ) @@ -842,3 +866,4 @@ def test_supports_audio_input(model, expected_bool): supports_pc = supports_audio_input(model=model) assert supports_pc == expected_bool +