(feat) - allow building litellm proxy from pip package (#7633)

* fix working build from pip

* add tests for proxy_build_from_pip_tests

* doc clean up for deployment

* docs cleanup

* docs build from pip

* fix cd docker/build_from_pip
This commit is contained in:
Ishaan Jaff 2025-01-08 16:36:57 -08:00 committed by GitHub
parent 43566e9842
commit fd0a03f719
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 248 additions and 49 deletions

View file

@ -1366,7 +1366,103 @@ jobs:
# Store test results
- store_test_results:
path: test-results
proxy_build_from_pip_tests:
machine:
image: ubuntu-2204:2023.10.1
resource_class: xlarge
working_directory: ~/project
steps:
- checkout
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
- run:
name: Install Python 3.9
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda activate myenv
python --version
- run:
name: Install Dependencies
command: |
pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1"
pip install aiohttp
python -m pip install --upgrade pip
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-mock==3.12.0"
pip install "pytest-asyncio==0.21.1"
pip install mypy
- run:
name: Build Docker image
command: |
cd docker/build_from_pip
docker build -t my-app:latest -f Dockerfile.build_from_pip .
- run:
name: Run Docker container
# intentionally give bad redis credentials here
# the OTEL test - should get this as a trace
command: |
cd docker/build_from_pip
docker run -d \
-p 4000:4000 \
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e REDIS_HOST=$REDIS_HOST \
-e REDIS_PASSWORD=$REDIS_PASSWORD \
-e REDIS_PORT=$REDIS_PORT \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e OTEL_EXPORTER="in_memory" \
-e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
-e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
-e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \
-e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
-e COHERE_API_KEY=$COHERE_API_KEY \
-e GCS_FLUSH_INTERVAL="1" \
--name my-app \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
- run:
name: Install curl and dockerize
command: |
sudo apt-get update
sudo apt-get install -y curl
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
- run:
name: Start outputting logs
command: docker logs -f my-app
background: true
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
- run:
name: Run tests
command: |
python -m pytest -vv tests/basic_proxy_startup_tests -x --junitxml=test-results/junit-2.xml --durations=5
no_output_timeout: 120m
# Clean up first container
- run:
name: Stop and remove first container
command: |
docker stop my-app
docker rm my-app
proxy_pass_through_endpoint_tests:
machine:
image: ubuntu-2204:2023.10.1
@ -1792,6 +1888,12 @@ workflows:
only:
- main
- /litellm_.*/
- proxy_build_from_pip_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- proxy_pass_through_endpoint_tests:
filters:
branches:
@ -1903,6 +2005,7 @@ workflows:
- installing_litellm_on_python
- installing_litellm_on_python_3_13
- proxy_logging_guardrails_model_info_tests
- proxy_build_from_pip_tests
- proxy_pass_through_endpoint_tests
- check_code_and_doc_quality
filters:

View file

@ -0,0 +1,23 @@
FROM cgr.dev/chainguard/python:latest-dev
USER root
WORKDIR /app
ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
COPY requirements.txt .
RUN --mount=type=cache,target=${HOME}/.cache/pip \
${HOME}/venv/bin/pip install -r requirements.txt
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
CMD ["--port", "4000"]

View file

@ -0,0 +1,9 @@
# Docker to build LiteLLM Proxy from litellm pip package
### When to use this ?
If you need to build LiteLLM Proxy from litellm pip package, you can use this Dockerfile as a reference.
### Why build from pip package ?
- If your company has a strict requirement around security / building images you can follow steps outlined here

View file

@ -0,0 +1,9 @@
model_list:
- model_name: "gpt-4"
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
alerting: ["slack"]

View file

@ -0,0 +1,4 @@
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
prometheus_client
langfuse
prisma

View file

@ -32,11 +32,10 @@ source .env
docker-compose up
```
<Tabs>
<TabItem value="basic" label="Basic (No DB)">
### Docker Run
### Step 1. CREATE config.yaml
#### Step 1. CREATE config.yaml
Example `litellm_config.yaml`
@ -52,7 +51,7 @@ model_list:
### Step 2. RUN Docker Image
#### Step 2. RUN Docker Image
```shell
docker run \
@ -66,7 +65,7 @@ docker run \
Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
### Step 3. TEST Request
#### Step 3. TEST Request
Pass `model=azure-gpt-3.5` this was set on step 1
@ -84,13 +83,7 @@ Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/l
}'
```
</TabItem>
<TabItem value="cli" label="With CLI Args">
#### Run with LiteLLM CLI args
### Docker Run - CLI Args
See all supported CLI args [here](https://docs.litellm.ai/docs/proxy/cli):
@ -104,15 +97,8 @@ Here's how you can run the docker image and start litellm on port 8002 with `num
docker run ghcr.io/berriai/litellm:main-latest --port 8002 --num_workers 8
```
</TabItem>
<TabItem value="terraform" label="Terraform">
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
</TabItem>
<TabItem value="base-image" label="use litellm as a base image">
### Use litellm as a base image
```shell
# Use the provided base image
@ -137,9 +123,75 @@ EXPOSE 4000/tcp
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
```
</TabItem>
### Build from litellm `pip` package
<TabItem value="kubernetes" label="Kubernetes">
Follow these instructons to build a docker container from the litellm pip package. If your company has a strict requirement around security / building images you can follow these steps.
Dockerfile
```shell
FROM cgr.dev/chainguard/python:latest-dev
USER root
WORKDIR /app
ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
COPY requirements.txt .
RUN --mount=type=cache,target=${HOME}/.cache/pip \
${HOME}/venv/bin/pip install -r requirements.txt
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
CMD ["--port", "4000"]
```
Example `requirements.txt`
```shell
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
prometheus_client
langfuse
prisma
```
Build the docker image
```shell
docker build \
-f Dockerfile.build_from_pip \
-t litellm-proxy-with-pip-5 .
```
Run the docker image
```shell
docker run \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e OPENAI_API_KEY="sk-1222" \
-e DATABASE_URL="postgresql://xxxxxxxxx \
-p 4000:4000 \
litellm-proxy-with-pip-5 \
--config /app/config.yaml --detailed_debug
```
### Terraform
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
### Kubernetes
Deploying a config file based litellm instance just requires a simple deployment that loads
the config.yaml file via a config map. Also it would be a good practice to use the env var
@ -204,11 +256,8 @@ spec:
To avoid issues with predictability, difficulties in rollback, and inconsistent environments, use versioning or SHA digests (for example, `litellm:main-v1.30.3` or `litellm@sha256:12345abcdef...`) instead of `litellm:main-latest`.
:::
</TabItem>
<TabItem value="helm-" label="Helm Chart">
### Helm Chart
:::info
@ -248,13 +297,9 @@ kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
</TabItem>
</Tabs>
**That's it ! That's the quick start to deploy litellm**
## Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
#### Make LLM API Requests
:::info
💡 Go here 👉 [to make your first LLM API Request](user_keys)
@ -263,7 +308,7 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
:::
## Options to deploy LiteLLM
## Deployment Options
| Docs | When to Use |
| ------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
@ -272,8 +317,8 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
| [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
| [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
## Deploy with Database
### Docker, Kubernetes, Helm Chart
### Deploy with Database
##### Docker, Kubernetes, Helm Chart
Requirements:
- Need a postgres database (e.g. [Supabase](https://supabase.com/), [Neon](https://neon.tech/), etc) Set `DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>` in your env
@ -491,7 +536,7 @@ Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
</TabItem>
</Tabs>
## LiteLLM container + Redis
### Deploy with Redis
Use Redis when you need litellm to load balance across multiple litellm containers
The only change required is setting Redis on your `config.yaml`
@ -523,7 +568,7 @@ Start docker container with config
docker run ghcr.io/berriai/litellm:main-latest --config your_config.yaml
```
## LiteLLM Database container + PostgresDB + Redis
### Deploy with Database + Redis
The only change required is setting Redis on your `config.yaml`
LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage )
@ -558,7 +603,7 @@ docker run --name litellm-proxy \
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
```
## LiteLLM without Internet Connection
### (Non Root) - without Internet Connection
By default `prisma generate` downloads [prisma's engine binaries](https://www.prisma.io/docs/orm/reference/environment-variables-reference#custom-engine-file-locations). This might cause errors when running without internet connection.
@ -572,7 +617,7 @@ docker pull ghcr.io/berriai/litellm-non_root:main-stable
## Advanced Deployment Settings
### 1. Customization of the server root path (custom Proxy base url)
### 1. Custom server root path (Proxy base url)
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
@ -670,7 +715,7 @@ After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (sinc
**That's it**, that's all you need to run the proxy on a custom root path
### 2. Setting SSL Certification
### 2. SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy
@ -684,7 +729,7 @@ docker run ghcr.io/berriai/litellm:main-latest \
Provide an ssl certificate when starting litellm proxy server
### 3. Using Http/2 with Hypercorn
### 3. Http/2 with Hypercorn
Use this if you want to run the proxy with hypercorn to support http/2
@ -731,7 +776,7 @@ docker run \
--run_hypercorn
```
### 4. Providing LiteLLM config.yaml file as a s3, GCS Bucket Object/url
### 4. config.yaml file on s3, GCS Bucket Object/url
Use this if you cannot mount a config file on your deployment service (example - AWS Fargate, Railway etc)
@ -787,7 +832,7 @@ docker run --name litellm-proxy \
<Tabs>
<TabItem value="AWS EKS" label="AWS EKS - Kubernetes">
### Kubernetes - Deploy on EKS
### Kubernetes (AWS EKS)
Step1. Create an EKS Cluster with the following spec
@ -880,7 +925,7 @@ Once the container is running, you can access the application by going to `http:
</TabItem>
<TabItem value="google-cloud-run" label="Google Cloud Run">
### Deploy on Google Cloud Run
### Google Cloud Run
1. Fork this repo - [github.com/BerriAI/example_litellm_gcp_cloud_run](https://github.com/BerriAI/example_litellm_gcp_cloud_run)
@ -907,7 +952,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
</TabItem>
<TabItem value="render" label="Render deploy">
### Deploy on Render https://render.com/
### Render
https://render.com/
<iframe width="840" height="500" src="https://www.loom.com/embed/805964b3c8384b41be180a61442389a3" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
@ -916,7 +963,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
</TabItem>
<TabItem value="railway" label="Railway">
### Deploy on Railway https://railway.app
### Railway
https://railway.app
**Step 1: Click the button** to deploy to Railway
@ -930,7 +979,7 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
## Extras
### Run with docker compose
### Docker compose
**Step 1**

View file

@ -1,7 +1,9 @@
model_list:
- model_name: "fake-openai-endpoint"
- model_name: "gpt-4"
litellm_params:
model: openai/gpt-4o
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
alerting: ["slack"]