(feat) - allow building litellm proxy from pip package (#7633)

* fix working build from pip

* add tests for proxy_build_from_pip_tests

* doc clean up for deployment

* docs cleanup

* docs build from pip

* fix cd docker/build_from_pip
This commit is contained in:
Ishaan Jaff 2025-01-08 16:36:57 -08:00 committed by GitHub
parent 43566e9842
commit fd0a03f719
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 248 additions and 49 deletions

View file

@ -1366,7 +1366,103 @@ jobs:
# Store test results # Store test results
- store_test_results: - store_test_results:
path: test-results path: test-results
proxy_build_from_pip_tests:
machine:
image: ubuntu-2204:2023.10.1
resource_class: xlarge
working_directory: ~/project
steps:
- checkout
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
- run:
name: Install Python 3.9
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda activate myenv
python --version
- run:
name: Install Dependencies
command: |
pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1"
pip install aiohttp
python -m pip install --upgrade pip
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-mock==3.12.0"
pip install "pytest-asyncio==0.21.1"
pip install mypy
- run:
name: Build Docker image
command: |
cd docker/build_from_pip
docker build -t my-app:latest -f Dockerfile.build_from_pip .
- run:
name: Run Docker container
# intentionally give bad redis credentials here
# the OTEL test - should get this as a trace
command: |
cd docker/build_from_pip
docker run -d \
-p 4000:4000 \
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e REDIS_HOST=$REDIS_HOST \
-e REDIS_PASSWORD=$REDIS_PASSWORD \
-e REDIS_PORT=$REDIS_PORT \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e OTEL_EXPORTER="in_memory" \
-e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
-e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
-e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \
-e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
-e COHERE_API_KEY=$COHERE_API_KEY \
-e GCS_FLUSH_INTERVAL="1" \
--name my-app \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
- run:
name: Install curl and dockerize
command: |
sudo apt-get update
sudo apt-get install -y curl
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
- run:
name: Start outputting logs
command: docker logs -f my-app
background: true
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
- run:
name: Run tests
command: |
python -m pytest -vv tests/basic_proxy_startup_tests -x --junitxml=test-results/junit-2.xml --durations=5
no_output_timeout: 120m
# Clean up first container
- run:
name: Stop and remove first container
command: |
docker stop my-app
docker rm my-app
proxy_pass_through_endpoint_tests: proxy_pass_through_endpoint_tests:
machine: machine:
image: ubuntu-2204:2023.10.1 image: ubuntu-2204:2023.10.1
@ -1792,6 +1888,12 @@ workflows:
only: only:
- main - main
- /litellm_.*/ - /litellm_.*/
- proxy_build_from_pip_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- proxy_pass_through_endpoint_tests: - proxy_pass_through_endpoint_tests:
filters: filters:
branches: branches:
@ -1903,6 +2005,7 @@ workflows:
- installing_litellm_on_python - installing_litellm_on_python
- installing_litellm_on_python_3_13 - installing_litellm_on_python_3_13
- proxy_logging_guardrails_model_info_tests - proxy_logging_guardrails_model_info_tests
- proxy_build_from_pip_tests
- proxy_pass_through_endpoint_tests - proxy_pass_through_endpoint_tests
- check_code_and_doc_quality - check_code_and_doc_quality
filters: filters:

View file

@ -0,0 +1,23 @@
FROM cgr.dev/chainguard/python:latest-dev
USER root
WORKDIR /app
ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
COPY requirements.txt .
RUN --mount=type=cache,target=${HOME}/.cache/pip \
${HOME}/venv/bin/pip install -r requirements.txt
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
CMD ["--port", "4000"]

View file

@ -0,0 +1,9 @@
# Docker to build LiteLLM Proxy from litellm pip package
### When to use this ?
If you need to build LiteLLM Proxy from litellm pip package, you can use this Dockerfile as a reference.
### Why build from pip package ?
- If your company has a strict requirement around security / building images you can follow steps outlined here

View file

@ -0,0 +1,9 @@
model_list:
- model_name: "gpt-4"
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
alerting: ["slack"]

View file

@ -0,0 +1,4 @@
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
prometheus_client
langfuse
prisma

View file

@ -32,11 +32,10 @@ source .env
docker-compose up docker-compose up
``` ```
<Tabs>
<TabItem value="basic" label="Basic (No DB)"> ### Docker Run
### Step 1. CREATE config.yaml #### Step 1. CREATE config.yaml
Example `litellm_config.yaml` Example `litellm_config.yaml`
@ -52,7 +51,7 @@ model_list:
### Step 2. RUN Docker Image #### Step 2. RUN Docker Image
```shell ```shell
docker run \ docker run \
@ -66,7 +65,7 @@ docker run \
Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm) Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
### Step 3. TEST Request #### Step 3. TEST Request
Pass `model=azure-gpt-3.5` this was set on step 1 Pass `model=azure-gpt-3.5` this was set on step 1
@ -84,13 +83,7 @@ Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/l
}' }'
``` ```
</TabItem> ### Docker Run - CLI Args
<TabItem value="cli" label="With CLI Args">
#### Run with LiteLLM CLI args
See all supported CLI args [here](https://docs.litellm.ai/docs/proxy/cli): See all supported CLI args [here](https://docs.litellm.ai/docs/proxy/cli):
@ -104,15 +97,8 @@ Here's how you can run the docker image and start litellm on port 8002 with `num
docker run ghcr.io/berriai/litellm:main-latest --port 8002 --num_workers 8 docker run ghcr.io/berriai/litellm:main-latest --port 8002 --num_workers 8
``` ```
</TabItem>
<TabItem value="terraform" label="Terraform">
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform ### Use litellm as a base image
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
</TabItem>
<TabItem value="base-image" label="use litellm as a base image">
```shell ```shell
# Use the provided base image # Use the provided base image
@ -137,9 +123,75 @@ EXPOSE 4000/tcp
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"] CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
``` ```
</TabItem> ### Build from litellm `pip` package
<TabItem value="kubernetes" label="Kubernetes"> Follow these instructons to build a docker container from the litellm pip package. If your company has a strict requirement around security / building images you can follow these steps.
Dockerfile
```shell
FROM cgr.dev/chainguard/python:latest-dev
USER root
WORKDIR /app
ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
COPY requirements.txt .
RUN --mount=type=cache,target=${HOME}/.cache/pip \
${HOME}/venv/bin/pip install -r requirements.txt
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
CMD ["--port", "4000"]
```
Example `requirements.txt`
```shell
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
prometheus_client
langfuse
prisma
```
Build the docker image
```shell
docker build \
-f Dockerfile.build_from_pip \
-t litellm-proxy-with-pip-5 .
```
Run the docker image
```shell
docker run \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e OPENAI_API_KEY="sk-1222" \
-e DATABASE_URL="postgresql://xxxxxxxxx \
-p 4000:4000 \
litellm-proxy-with-pip-5 \
--config /app/config.yaml --detailed_debug
```
### Terraform
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
### Kubernetes
Deploying a config file based litellm instance just requires a simple deployment that loads Deploying a config file based litellm instance just requires a simple deployment that loads
the config.yaml file via a config map. Also it would be a good practice to use the env var the config.yaml file via a config map. Also it would be a good practice to use the env var
@ -204,11 +256,8 @@ spec:
To avoid issues with predictability, difficulties in rollback, and inconsistent environments, use versioning or SHA digests (for example, `litellm:main-v1.30.3` or `litellm@sha256:12345abcdef...`) instead of `litellm:main-latest`. To avoid issues with predictability, difficulties in rollback, and inconsistent environments, use versioning or SHA digests (for example, `litellm:main-v1.30.3` or `litellm@sha256:12345abcdef...`) instead of `litellm:main-latest`.
::: :::
</TabItem>
<TabItem value="helm-" label="Helm Chart">
### Helm Chart
:::info :::info
@ -248,13 +297,9 @@ kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`. Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
</TabItem>
</Tabs>
**That's it ! That's the quick start to deploy litellm** **That's it ! That's the quick start to deploy litellm**
## Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl #### Make LLM API Requests
:::info :::info
💡 Go here 👉 [to make your first LLM API Request](user_keys) 💡 Go here 👉 [to make your first LLM API Request](user_keys)
@ -263,7 +308,7 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
::: :::
## Options to deploy LiteLLM ## Deployment Options
| Docs | When to Use | | Docs | When to Use |
| ------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
@ -272,8 +317,8 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
| [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers | | [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
| [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers | | [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
## Deploy with Database ### Deploy with Database
### Docker, Kubernetes, Helm Chart ##### Docker, Kubernetes, Helm Chart
Requirements: Requirements:
- Need a postgres database (e.g. [Supabase](https://supabase.com/), [Neon](https://neon.tech/), etc) Set `DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>` in your env - Need a postgres database (e.g. [Supabase](https://supabase.com/), [Neon](https://neon.tech/), etc) Set `DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>` in your env
@ -491,7 +536,7 @@ Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
</TabItem> </TabItem>
</Tabs> </Tabs>
## LiteLLM container + Redis ### Deploy with Redis
Use Redis when you need litellm to load balance across multiple litellm containers Use Redis when you need litellm to load balance across multiple litellm containers
The only change required is setting Redis on your `config.yaml` The only change required is setting Redis on your `config.yaml`
@ -523,7 +568,7 @@ Start docker container with config
docker run ghcr.io/berriai/litellm:main-latest --config your_config.yaml docker run ghcr.io/berriai/litellm:main-latest --config your_config.yaml
``` ```
## LiteLLM Database container + PostgresDB + Redis ### Deploy with Database + Redis
The only change required is setting Redis on your `config.yaml` The only change required is setting Redis on your `config.yaml`
LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage ) LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage )
@ -558,7 +603,7 @@ docker run --name litellm-proxy \
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
``` ```
## LiteLLM without Internet Connection ### (Non Root) - without Internet Connection
By default `prisma generate` downloads [prisma's engine binaries](https://www.prisma.io/docs/orm/reference/environment-variables-reference#custom-engine-file-locations). This might cause errors when running without internet connection. By default `prisma generate` downloads [prisma's engine binaries](https://www.prisma.io/docs/orm/reference/environment-variables-reference#custom-engine-file-locations). This might cause errors when running without internet connection.
@ -572,7 +617,7 @@ docker pull ghcr.io/berriai/litellm-non_root:main-stable
## Advanced Deployment Settings ## Advanced Deployment Settings
### 1. Customization of the server root path (custom Proxy base url) ### 1. Custom server root path (Proxy base url)
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1` 💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
@ -670,7 +715,7 @@ After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (sinc
**That's it**, that's all you need to run the proxy on a custom root path **That's it**, that's all you need to run the proxy on a custom root path
### 2. Setting SSL Certification ### 2. SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy Use this, If you need to set ssl certificates for your on prem litellm proxy
@ -684,7 +729,7 @@ docker run ghcr.io/berriai/litellm:main-latest \
Provide an ssl certificate when starting litellm proxy server Provide an ssl certificate when starting litellm proxy server
### 3. Using Http/2 with Hypercorn ### 3. Http/2 with Hypercorn
Use this if you want to run the proxy with hypercorn to support http/2 Use this if you want to run the proxy with hypercorn to support http/2
@ -731,7 +776,7 @@ docker run \
--run_hypercorn --run_hypercorn
``` ```
### 4. Providing LiteLLM config.yaml file as a s3, GCS Bucket Object/url ### 4. config.yaml file on s3, GCS Bucket Object/url
Use this if you cannot mount a config file on your deployment service (example - AWS Fargate, Railway etc) Use this if you cannot mount a config file on your deployment service (example - AWS Fargate, Railway etc)
@ -787,7 +832,7 @@ docker run --name litellm-proxy \
<Tabs> <Tabs>
<TabItem value="AWS EKS" label="AWS EKS - Kubernetes"> <TabItem value="AWS EKS" label="AWS EKS - Kubernetes">
### Kubernetes - Deploy on EKS ### Kubernetes (AWS EKS)
Step1. Create an EKS Cluster with the following spec Step1. Create an EKS Cluster with the following spec
@ -880,7 +925,7 @@ Once the container is running, you can access the application by going to `http:
</TabItem> </TabItem>
<TabItem value="google-cloud-run" label="Google Cloud Run"> <TabItem value="google-cloud-run" label="Google Cloud Run">
### Deploy on Google Cloud Run ### Google Cloud Run
1. Fork this repo - [github.com/BerriAI/example_litellm_gcp_cloud_run](https://github.com/BerriAI/example_litellm_gcp_cloud_run) 1. Fork this repo - [github.com/BerriAI/example_litellm_gcp_cloud_run](https://github.com/BerriAI/example_litellm_gcp_cloud_run)
@ -907,7 +952,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
</TabItem> </TabItem>
<TabItem value="render" label="Render deploy"> <TabItem value="render" label="Render deploy">
### Deploy on Render https://render.com/ ### Render
https://render.com/
<iframe width="840" height="500" src="https://www.loom.com/embed/805964b3c8384b41be180a61442389a3" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <iframe width="840" height="500" src="https://www.loom.com/embed/805964b3c8384b41be180a61442389a3" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
@ -916,7 +963,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
</TabItem> </TabItem>
<TabItem value="railway" label="Railway"> <TabItem value="railway" label="Railway">
### Deploy on Railway https://railway.app ### Railway
https://railway.app
**Step 1: Click the button** to deploy to Railway **Step 1: Click the button** to deploy to Railway
@ -930,7 +979,7 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
## Extras ## Extras
### Run with docker compose ### Docker compose
**Step 1** **Step 1**

View file

@ -1,7 +1,9 @@
model_list: model_list:
- model_name: "fake-openai-endpoint" - model_name: "gpt-4"
litellm_params: litellm_params:
model: openai/gpt-4o model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings: general_settings:
alerting: ["slack"] alerting: ["slack"]