forked from phoenix/litellm-mirror
* allow configuring httpx hooks for AsyncHTTPHandler (#6290) Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * Fixes and minor improvements for Helm Chart (#6402) * reckoner hack * fix default * add extracontainers option * revert chart * fix extracontainers * fix deployment * remove init container * update docs * add helm lint to deploy step * change name * (refactor) prometheus async_log_success_event to be under 100 LOC (#6416) * unit testig for prometheus * unit testing for success metrics * use 1 helper for _increment_token_metrics * use helper for _increment_remaining_budget_metrics * use _increment_remaining_budget_metrics * use _increment_top_level_request_and_spend_metrics * use helper for _set_latency_metrics * remove noqa violation * fix test prometheus * test prometheus * unit testing for all prometheus helper functions * fix prom unit tests * fix unit tests prometheus * fix unit test prom * (refactor) router - use static methods for client init utils (#6420) * use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915 * (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend (#6406) * code cleanup remove unused and undocumented code files * fix unused logging integrations cleanup * update chart version * add circleci tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> * fix: fix linting error * fix(http_handler.py): fix linting error --------- Co-authored-by: Alejandro Rodríguez <alejorro70@gmail.com> Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
This commit is contained in:
parent
1cd1d23fdf
commit
cc8dd80209
8 changed files with 60 additions and 86 deletions
|
@ -416,15 +416,17 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install ruff
|
pip install ruff
|
||||||
pip install pylint
|
pip install pylint
|
||||||
pip install pyright
|
pip install pyright
|
||||||
pip install .
|
pip install .
|
||||||
|
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||||
- run: ruff check ./litellm
|
- run: ruff check ./litellm
|
||||||
- run: python ./tests/documentation_tests/test_general_setting_keys.py
|
- run: python ./tests/documentation_tests/test_general_setting_keys.py
|
||||||
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
||||||
- run: python ./tests/documentation_tests/test_env_keys.py
|
- run: python ./tests/documentation_tests/test_env_keys.py
|
||||||
|
- run: helm lint ./deploy/charts/litellm-helm
|
||||||
|
|
||||||
db_migration_disable_update_check:
|
db_migration_disable_update_check:
|
||||||
machine:
|
machine:
|
||||||
image: ubuntu-2204:2023.10.1
|
image: ubuntu-2204:2023.10.1
|
||||||
|
@ -1099,4 +1101,4 @@ workflows:
|
||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
|
|
||||||
|
|
5
.github/workflows/ghcr_helm_deploy.yml
vendored
5
.github/workflows/ghcr_helm_deploy.yml
vendored
|
@ -50,6 +50,9 @@ jobs:
|
||||||
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
|
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
|
||||||
version-fragment: 'bug'
|
version-fragment: 'bug'
|
||||||
|
|
||||||
|
- name: Lint helm chart
|
||||||
|
run: helm lint deploy/charts/litellm-helm
|
||||||
|
|
||||||
- uses: ./.github/actions/helm-oci-chart-releaser
|
- uses: ./.github/actions/helm-oci-chart-releaser
|
||||||
with:
|
with:
|
||||||
name: litellm-helm
|
name: litellm-helm
|
||||||
|
@ -61,4 +64,4 @@ jobs:
|
||||||
registry_username: ${{ github.actor }}
|
registry_username: ${{ github.actor }}
|
||||||
registry_password: ${{ secrets.GITHUB_TOKEN }}
|
registry_password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
update_dependencies: true
|
update_dependencies: true
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ version: 0.3.0
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
# It is recommended to use it with quotes.
|
# It is recommended to use it with quotes.
|
||||||
appVersion: v1.46.6
|
appVersion: v1.50.2
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: "postgresql"
|
- name: "postgresql"
|
||||||
|
|
|
@ -28,14 +28,13 @@ If `db.useStackgresOperator` is used (not yet implemented):
|
||||||
| `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` |
|
| `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` |
|
||||||
| `image.pullPolicy` | LiteLLM Proxy image pull policy | `IfNotPresent` |
|
| `image.pullPolicy` | LiteLLM Proxy image pull policy | `IfNotPresent` |
|
||||||
| `image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
|
| `image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
|
||||||
| `image.dbReadyImage` | On Pod startup, an initContainer is used to make sure the Postgres database is available before attempting to start LiteLLM. This field specifies the image to use as that initContainer. | `docker.io/bitnami/postgresql` |
|
|
||||||
| `image.dbReadyTag` | Tag for the above image. If not specified, "latest" is used. | `""` |
|
|
||||||
| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` |
|
| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` |
|
||||||
| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` |
|
| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` |
|
||||||
| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
||||||
| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` |
|
| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` |
|
||||||
| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
||||||
| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A |
|
| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A |
|
||||||
|
| `extraContainers[]` | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy. | `[]` |
|
||||||
|
|
||||||
#### Example `environmentSecrets` Secret
|
#### Example `environmentSecrets` Secret
|
||||||
|
|
||||||
|
@ -127,4 +126,4 @@ kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.ma
|
||||||
At the time of writing, the Admin UI is unable to add models. This is because
|
At the time of writing, the Admin UI is unable to add models. This is because
|
||||||
it would need to update the `config.yaml` file which is a exposed ConfigMap, and
|
it would need to update the `config.yaml` file which is a exposed ConfigMap, and
|
||||||
therefore, read-only. This is a limitation of this helm chart, not the Admin UI
|
therefore, read-only. This is a limitation of this helm chart, not the Admin UI
|
||||||
itself.
|
itself.
|
||||||
|
|
|
@ -31,71 +31,6 @@ spec:
|
||||||
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
|
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
|
||||||
securityContext:
|
securityContext:
|
||||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||||
initContainers:
|
|
||||||
- name: db-ready
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
||||||
image: "{{ .Values.image.dbReadyImage }}:{{ .Values.image.dbReadyTag | default("16.1.0-debian-11-r20") }}"
|
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
||||||
env:
|
|
||||||
{{- if .Values.db.deployStandalone }}
|
|
||||||
- name: DATABASE_USERNAME
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
|
||||||
key: username
|
|
||||||
- name: PGPASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
|
||||||
key: password
|
|
||||||
- name: DATABASE_HOST
|
|
||||||
value: {{ .Release.Name }}-postgresql
|
|
||||||
- name: DATABASE_NAME
|
|
||||||
value: litellm
|
|
||||||
{{- else if .Values.db.useExisting }}
|
|
||||||
- name: DATABASE_USERNAME
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ .Values.db.secret.name }}
|
|
||||||
key: {{ .Values.db.secret.usernameKey }}
|
|
||||||
- name: PGPASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ .Values.db.secret.name }}
|
|
||||||
key: {{ .Values.db.secret.passwordKey }}
|
|
||||||
- name: DATABASE_HOST
|
|
||||||
value: {{ .Values.db.endpoint }}
|
|
||||||
- name: DATABASE_NAME
|
|
||||||
value: {{ .Values.db.database }}
|
|
||||||
{{- end }}
|
|
||||||
command:
|
|
||||||
- sh
|
|
||||||
- -c
|
|
||||||
- |
|
|
||||||
# Maximum wait time will be (limit * 2) seconds.
|
|
||||||
limit=60
|
|
||||||
current=0
|
|
||||||
ret=1
|
|
||||||
while [ $current -lt $limit ] && [ $ret -ne 0 ]; do
|
|
||||||
echo "Waiting for database to be ready $current"
|
|
||||||
psql -U $(DATABASE_USERNAME) -h $(DATABASE_HOST) -l
|
|
||||||
ret=$?
|
|
||||||
current=$(( $current + 1 ))
|
|
||||||
sleep 2
|
|
||||||
done
|
|
||||||
if [ $ret -eq 0 ]; then
|
|
||||||
echo "Database is ready"
|
|
||||||
else
|
|
||||||
echo "Database failed to become ready before we gave up waiting."
|
|
||||||
fi
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.resources | nindent 12 }}
|
|
||||||
{{ if .Values.securityContext.readOnlyRootFilesystem }}
|
|
||||||
volumeMounts:
|
|
||||||
- name: tmp
|
|
||||||
mountPath: /tmp
|
|
||||||
{{ end }}
|
|
||||||
containers:
|
containers:
|
||||||
- name: {{ include "litellm.name" . }}
|
- name: {{ include "litellm.name" . }}
|
||||||
securityContext:
|
securityContext:
|
||||||
|
@ -203,6 +138,9 @@ spec:
|
||||||
{{- with .Values.volumeMounts }}
|
{{- with .Values.volumeMounts }}
|
||||||
{{- toYaml . | nindent 12 }}
|
{{- toYaml . | nindent 12 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- with .Values.extraContainers }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
volumes:
|
volumes:
|
||||||
{{ if .Values.securityContext.readOnlyRootFilesystem }}
|
{{ if .Values.securityContext.readOnlyRootFilesystem }}
|
||||||
- name: tmp
|
- name: tmp
|
||||||
|
@ -235,4 +173,4 @@ spec:
|
||||||
{{- with .Values.tolerations }}
|
{{- with .Values.tolerations }}
|
||||||
tolerations:
|
tolerations:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -7,16 +7,11 @@ replicaCount: 1
|
||||||
image:
|
image:
|
||||||
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
||||||
repository: ghcr.io/berriai/litellm-database
|
repository: ghcr.io/berriai/litellm-database
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: Always
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
# Overrides the image tag whose default is the chart appVersion.
|
||||||
# tag: "main-latest"
|
# tag: "main-latest"
|
||||||
tag: ""
|
tag: ""
|
||||||
|
|
||||||
# Image and tag used for the init container to check and wait for the
|
|
||||||
# readiness of the postgres database.
|
|
||||||
dbReadyImage: docker.io/bitnami/postgresql
|
|
||||||
dbReadyTag: ""
|
|
||||||
|
|
||||||
imagePullSecrets: []
|
imagePullSecrets: []
|
||||||
nameOverride: "litellm"
|
nameOverride: "litellm"
|
||||||
fullnameOverride: ""
|
fullnameOverride: ""
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
|
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from httpx import USE_CLIENT_DEFAULT
|
from httpx import USE_CLIENT_DEFAULT
|
||||||
|
@ -32,15 +32,20 @@ class AsyncHTTPHandler:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
|
||||||
concurrent_limit=1000,
|
concurrent_limit=1000,
|
||||||
):
|
):
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
self.event_hooks = event_hooks
|
||||||
self.client = self.create_client(
|
self.client = self.create_client(
|
||||||
timeout=timeout, concurrent_limit=concurrent_limit
|
timeout=timeout, concurrent_limit=concurrent_limit, event_hooks=event_hooks
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_client(
|
def create_client(
|
||||||
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
|
self,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]],
|
||||||
|
concurrent_limit: int,
|
||||||
|
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
|
||||||
) -> httpx.AsyncClient:
|
) -> httpx.AsyncClient:
|
||||||
|
|
||||||
# SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
|
# SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
|
||||||
|
@ -55,6 +60,7 @@ class AsyncHTTPHandler:
|
||||||
# Create a client with a connection pool
|
# Create a client with a connection pool
|
||||||
|
|
||||||
return httpx.AsyncClient(
|
return httpx.AsyncClient(
|
||||||
|
event_hooks=event_hooks,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=concurrent_limit,
|
max_connections=concurrent_limit,
|
||||||
|
@ -114,7 +120,9 @@ class AsyncHTTPHandler:
|
||||||
return response
|
return response
|
||||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||||
# Retry the request with a new session if there is a connection error
|
# Retry the request with a new session if there is a connection error
|
||||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
new_client = self.create_client(
|
||||||
|
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
return await self.single_connection_post_request(
|
return await self.single_connection_post_request(
|
||||||
url=url,
|
url=url,
|
||||||
|
@ -172,7 +180,9 @@ class AsyncHTTPHandler:
|
||||||
return response
|
return response
|
||||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||||
# Retry the request with a new session if there is a connection error
|
# Retry the request with a new session if there is a connection error
|
||||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
new_client = self.create_client(
|
||||||
|
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
return await self.single_connection_post_request(
|
return await self.single_connection_post_request(
|
||||||
url=url,
|
url=url,
|
||||||
|
@ -229,7 +239,9 @@ class AsyncHTTPHandler:
|
||||||
return response
|
return response
|
||||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||||
# Retry the request with a new session if there is a connection error
|
# Retry the request with a new session if there is a connection error
|
||||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
new_client = self.create_client(
|
||||||
|
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
return await self.single_connection_post_request(
|
return await self.single_connection_post_request(
|
||||||
url=url,
|
url=url,
|
||||||
|
|
|
@ -15,6 +15,7 @@ sys.path.insert(
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers
|
||||||
from litellm.proxy.utils import (
|
from litellm.proxy.utils import (
|
||||||
_duration_in_seconds,
|
_duration_in_seconds,
|
||||||
_extract_from_regex,
|
_extract_from_regex,
|
||||||
|
@ -830,6 +831,29 @@ def test_is_base64_encoded():
|
||||||
assert is_base64_encoded(s=base64_image) is True
|
assert is_base64_encoded(s=base64_image) is True
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch("httpx.AsyncClient")
|
||||||
|
@mock.patch.dict(os.environ, {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"}, clear=True)
|
||||||
|
def test_async_http_handler(mock_async_client):
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
timeout = 120
|
||||||
|
event_hooks = {"request": [lambda r: r]}
|
||||||
|
concurrent_limit = 2
|
||||||
|
|
||||||
|
AsyncHTTPHandler(timeout, event_hooks, concurrent_limit)
|
||||||
|
|
||||||
|
mock_async_client.assert_called_with(
|
||||||
|
cert="/client.pem",
|
||||||
|
event_hooks=event_hooks,
|
||||||
|
headers=headers,
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=concurrent_limit,
|
||||||
|
max_keepalive_connections=concurrent_limit,
|
||||||
|
),
|
||||||
|
timeout=timeout,
|
||||||
|
verify="/certificate.pem",
|
||||||
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
|
"model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
|
||||||
)
|
)
|
||||||
|
@ -842,3 +866,4 @@ def test_supports_audio_input(model, expected_bool):
|
||||||
supports_pc = supports_audio_input(model=model)
|
supports_pc = supports_audio_input(model=model)
|
||||||
|
|
||||||
assert supports_pc == expected_bool
|
assert supports_pc == expected_bool
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue