mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
(feat) - allow building litellm proxy from pip package (#7633)
* fix working build from pip * add tests for proxy_build_from_pip_tests * doc clean up for deployment * docs cleanup * docs build from pip * fix cd docker/build_from_pip
This commit is contained in:
parent
43566e9842
commit
fd0a03f719
7 changed files with 248 additions and 49 deletions
|
@ -1366,7 +1366,103 @@ jobs:
|
|||
# Store test results
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
|
||||
proxy_build_from_pip_tests:
|
||||
machine:
|
||||
image: ubuntu-2204:2023.10.1
|
||||
resource_class: xlarge
|
||||
working_directory: ~/project
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install Docker CLI (In case it's not already installed)
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
- run:
|
||||
name: Install Python 3.9
|
||||
command: |
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
conda init bash
|
||||
source ~/.bashrc
|
||||
conda create -n myenv python=3.9 -y
|
||||
conda activate myenv
|
||||
python --version
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install aiohttp
|
||||
python -m pip install --upgrade pip
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install mypy
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: |
|
||||
cd docker/build_from_pip
|
||||
docker build -t my-app:latest -f Dockerfile.build_from_pip .
|
||||
- run:
|
||||
name: Run Docker container
|
||||
# intentionally give bad redis credentials here
|
||||
# the OTEL test - should get this as a trace
|
||||
command: |
|
||||
cd docker/build_from_pip
|
||||
docker run -d \
|
||||
-p 4000:4000 \
|
||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||
-e REDIS_HOST=$REDIS_HOST \
|
||||
-e REDIS_PASSWORD=$REDIS_PASSWORD \
|
||||
-e REDIS_PORT=$REDIS_PORT \
|
||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||
-e OTEL_EXPORTER="in_memory" \
|
||||
-e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
|
||||
-e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
|
||||
-e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
|
||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||
-e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
|
||||
-e COHERE_API_KEY=$COHERE_API_KEY \
|
||||
-e GCS_FLUSH_INTERVAL="1" \
|
||||
--name my-app \
|
||||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||
my-app:latest \
|
||||
--config /app/config.yaml \
|
||||
--port 4000 \
|
||||
--detailed_debug \
|
||||
- run:
|
||||
name: Install curl and dockerize
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y curl
|
||||
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
- run:
|
||||
name: Start outputting logs
|
||||
command: docker logs -f my-app
|
||||
background: true
|
||||
- run:
|
||||
name: Wait for app to be ready
|
||||
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||
- run:
|
||||
name: Run tests
|
||||
command: |
|
||||
python -m pytest -vv tests/basic_proxy_startup_tests -x --junitxml=test-results/junit-2.xml --durations=5
|
||||
no_output_timeout: 120m
|
||||
# Clean up first container
|
||||
- run:
|
||||
name: Stop and remove first container
|
||||
command: |
|
||||
docker stop my-app
|
||||
docker rm my-app
|
||||
proxy_pass_through_endpoint_tests:
|
||||
machine:
|
||||
image: ubuntu-2204:2023.10.1
|
||||
|
@ -1792,6 +1888,12 @@ workflows:
|
|||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- proxy_build_from_pip_tests:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- proxy_pass_through_endpoint_tests:
|
||||
filters:
|
||||
branches:
|
||||
|
@ -1903,6 +2005,7 @@ workflows:
|
|||
- installing_litellm_on_python
|
||||
- installing_litellm_on_python_3_13
|
||||
- proxy_logging_guardrails_model_info_tests
|
||||
- proxy_build_from_pip_tests
|
||||
- proxy_pass_through_endpoint_tests
|
||||
- check_code_and_doc_quality
|
||||
filters:
|
||||
|
|
23
docker/build_from_pip/Dockerfile.build_from_pip
Normal file
23
docker/build_from_pip/Dockerfile.build_from_pip
Normal file
|
@ -0,0 +1,23 @@
|
|||
FROM cgr.dev/chainguard/python:latest-dev
|
||||
|
||||
USER root
|
||||
WORKDIR /app
|
||||
|
||||
ENV HOME=/home/litellm
|
||||
ENV PATH="${HOME}/venv/bin:$PATH"
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk update && \
|
||||
apk add --no-cache gcc python3-dev openssl openssl-dev
|
||||
|
||||
RUN python -m venv ${HOME}/venv
|
||||
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN --mount=type=cache,target=${HOME}/.cache/pip \
|
||||
${HOME}/venv/bin/pip install -r requirements.txt
|
||||
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
ENTRYPOINT ["litellm"]
|
||||
CMD ["--port", "4000"]
|
9
docker/build_from_pip/Readme.md
Normal file
9
docker/build_from_pip/Readme.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Docker to build LiteLLM Proxy from litellm pip package
|
||||
|
||||
### When to use this ?
|
||||
|
||||
If you need to build LiteLLM Proxy from litellm pip package, you can use this Dockerfile as a reference.
|
||||
|
||||
### Why build from pip package ?
|
||||
|
||||
- If your company has a strict requirement around security / building images you can follow steps outlined here
|
9
docker/build_from_pip/litellm_config.yaml
Normal file
9
docker/build_from_pip/litellm_config.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
model_list:
|
||||
- model_name: "gpt-4"
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
4
docker/build_from_pip/requirements.txt
Normal file
4
docker/build_from_pip/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
|
||||
prometheus_client
|
||||
langfuse
|
||||
prisma
|
|
@ -32,11 +32,10 @@ source .env
|
|||
docker-compose up
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="basic" label="Basic (No DB)">
|
||||
### Docker Run
|
||||
|
||||
### Step 1. CREATE config.yaml
|
||||
#### Step 1. CREATE config.yaml
|
||||
|
||||
Example `litellm_config.yaml`
|
||||
|
||||
|
@ -52,7 +51,7 @@ model_list:
|
|||
|
||||
|
||||
|
||||
### Step 2. RUN Docker Image
|
||||
#### Step 2. RUN Docker Image
|
||||
|
||||
```shell
|
||||
docker run \
|
||||
|
@ -66,7 +65,7 @@ docker run \
|
|||
|
||||
Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
|
||||
|
||||
### Step 3. TEST Request
|
||||
#### Step 3. TEST Request
|
||||
|
||||
Pass `model=azure-gpt-3.5` this was set on step 1
|
||||
|
||||
|
@ -84,13 +83,7 @@ Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/l
|
|||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
|
||||
<TabItem value="cli" label="With CLI Args">
|
||||
|
||||
#### Run with LiteLLM CLI args
|
||||
### Docker Run - CLI Args
|
||||
|
||||
See all supported CLI args [here](https://docs.litellm.ai/docs/proxy/cli):
|
||||
|
||||
|
@ -104,15 +97,8 @@ Here's how you can run the docker image and start litellm on port 8002 with `num
|
|||
docker run ghcr.io/berriai/litellm:main-latest --port 8002 --num_workers 8
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="terraform" label="Terraform">
|
||||
|
||||
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform
|
||||
|
||||
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="base-image" label="use litellm as a base image">
|
||||
### Use litellm as a base image
|
||||
|
||||
```shell
|
||||
# Use the provided base image
|
||||
|
@ -137,9 +123,75 @@ EXPOSE 4000/tcp
|
|||
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
### Build from litellm `pip` package
|
||||
|
||||
<TabItem value="kubernetes" label="Kubernetes">
|
||||
Follow these instructons to build a docker container from the litellm pip package. If your company has a strict requirement around security / building images you can follow these steps.
|
||||
|
||||
Dockerfile
|
||||
|
||||
```shell
|
||||
FROM cgr.dev/chainguard/python:latest-dev
|
||||
|
||||
USER root
|
||||
WORKDIR /app
|
||||
|
||||
ENV HOME=/home/litellm
|
||||
ENV PATH="${HOME}/venv/bin:$PATH"
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk update && \
|
||||
apk add --no-cache gcc python3-dev openssl openssl-dev
|
||||
|
||||
RUN python -m venv ${HOME}/venv
|
||||
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN --mount=type=cache,target=${HOME}/.cache/pip \
|
||||
${HOME}/venv/bin/pip install -r requirements.txt
|
||||
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
ENTRYPOINT ["litellm"]
|
||||
CMD ["--port", "4000"]
|
||||
```
|
||||
|
||||
|
||||
Example `requirements.txt`
|
||||
|
||||
```shell
|
||||
litellm[proxy]==1.57.3 # Specify the litellm version you want to use
|
||||
prometheus_client
|
||||
langfuse
|
||||
prisma
|
||||
```
|
||||
|
||||
Build the docker image
|
||||
|
||||
```shell
|
||||
docker build \
|
||||
-f Dockerfile.build_from_pip \
|
||||
-t litellm-proxy-with-pip-5 .
|
||||
```
|
||||
|
||||
Run the docker image
|
||||
|
||||
```shell
|
||||
docker run \
|
||||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||
-e OPENAI_API_KEY="sk-1222" \
|
||||
-e DATABASE_URL="postgresql://xxxxxxxxx \
|
||||
-p 4000:4000 \
|
||||
litellm-proxy-with-pip-5 \
|
||||
--config /app/config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
### Terraform
|
||||
|
||||
s/o [Nicholas Cecere](https://www.linkedin.com/in/nicholas-cecere-24243549/) for his LiteLLM User Management Terraform
|
||||
|
||||
👉 [Go here for Terraform](https://github.com/ncecere/terraform-litellm-user-mgmt)
|
||||
|
||||
### Kubernetes
|
||||
|
||||
Deploying a config file based litellm instance just requires a simple deployment that loads
|
||||
the config.yaml file via a config map. Also it would be a good practice to use the env var
|
||||
|
@ -204,11 +256,8 @@ spec:
|
|||
To avoid issues with predictability, difficulties in rollback, and inconsistent environments, use versioning or SHA digests (for example, `litellm:main-v1.30.3` or `litellm@sha256:12345abcdef...`) instead of `litellm:main-latest`.
|
||||
:::
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="helm-" label="Helm Chart">
|
||||
|
||||
|
||||
### Helm Chart
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -248,13 +297,9 @@ kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
|
|||
|
||||
Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
**That's it ! That's the quick start to deploy litellm**
|
||||
|
||||
## Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
|
||||
#### Make LLM API Requests
|
||||
|
||||
:::info
|
||||
💡 Go here 👉 [to make your first LLM API Request](user_keys)
|
||||
|
@ -263,7 +308,7 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
|
|||
|
||||
:::
|
||||
|
||||
## Options to deploy LiteLLM
|
||||
## Deployment Options
|
||||
|
||||
| Docs | When to Use |
|
||||
| ------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
|
@ -272,8 +317,8 @@ LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, M
|
|||
| [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
|
||||
| [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
|
||||
|
||||
## Deploy with Database
|
||||
### Docker, Kubernetes, Helm Chart
|
||||
### Deploy with Database
|
||||
##### Docker, Kubernetes, Helm Chart
|
||||
|
||||
Requirements:
|
||||
- Need a postgres database (e.g. [Supabase](https://supabase.com/), [Neon](https://neon.tech/), etc) Set `DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>` in your env
|
||||
|
@ -491,7 +536,7 @@ Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## LiteLLM container + Redis
|
||||
### Deploy with Redis
|
||||
Use Redis when you need litellm to load balance across multiple litellm containers
|
||||
|
||||
The only change required is setting Redis on your `config.yaml`
|
||||
|
@ -523,7 +568,7 @@ Start docker container with config
|
|||
docker run ghcr.io/berriai/litellm:main-latest --config your_config.yaml
|
||||
```
|
||||
|
||||
## LiteLLM Database container + PostgresDB + Redis
|
||||
### Deploy with Database + Redis
|
||||
|
||||
The only change required is setting Redis on your `config.yaml`
|
||||
LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage )
|
||||
|
@ -558,7 +603,7 @@ docker run --name litellm-proxy \
|
|||
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||
```
|
||||
|
||||
## LiteLLM without Internet Connection
|
||||
### (Non Root) - without Internet Connection
|
||||
|
||||
By default `prisma generate` downloads [prisma's engine binaries](https://www.prisma.io/docs/orm/reference/environment-variables-reference#custom-engine-file-locations). This might cause errors when running without internet connection.
|
||||
|
||||
|
@ -572,7 +617,7 @@ docker pull ghcr.io/berriai/litellm-non_root:main-stable
|
|||
|
||||
## Advanced Deployment Settings
|
||||
|
||||
### 1. Customization of the server root path (custom Proxy base url)
|
||||
### 1. Custom server root path (Proxy base url)
|
||||
|
||||
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
|
||||
|
||||
|
@ -670,7 +715,7 @@ After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (sinc
|
|||
|
||||
**That's it**, that's all you need to run the proxy on a custom root path
|
||||
|
||||
### 2. Setting SSL Certification
|
||||
### 2. SSL Certification
|
||||
|
||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||
|
||||
|
@ -684,7 +729,7 @@ docker run ghcr.io/berriai/litellm:main-latest \
|
|||
|
||||
Provide an ssl certificate when starting litellm proxy server
|
||||
|
||||
### 3. Using Http/2 with Hypercorn
|
||||
### 3. Http/2 with Hypercorn
|
||||
|
||||
Use this if you want to run the proxy with hypercorn to support http/2
|
||||
|
||||
|
@ -731,7 +776,7 @@ docker run \
|
|||
--run_hypercorn
|
||||
```
|
||||
|
||||
### 4. Providing LiteLLM config.yaml file as a s3, GCS Bucket Object/url
|
||||
### 4. config.yaml file on s3, GCS Bucket Object/url
|
||||
|
||||
Use this if you cannot mount a config file on your deployment service (example - AWS Fargate, Railway etc)
|
||||
|
||||
|
@ -787,7 +832,7 @@ docker run --name litellm-proxy \
|
|||
<Tabs>
|
||||
<TabItem value="AWS EKS" label="AWS EKS - Kubernetes">
|
||||
|
||||
### Kubernetes - Deploy on EKS
|
||||
### Kubernetes (AWS EKS)
|
||||
|
||||
Step1. Create an EKS Cluster with the following spec
|
||||
|
||||
|
@ -880,7 +925,7 @@ Once the container is running, you can access the application by going to `http:
|
|||
</TabItem>
|
||||
<TabItem value="google-cloud-run" label="Google Cloud Run">
|
||||
|
||||
### Deploy on Google Cloud Run
|
||||
### Google Cloud Run
|
||||
|
||||
1. Fork this repo - [github.com/BerriAI/example_litellm_gcp_cloud_run](https://github.com/BerriAI/example_litellm_gcp_cloud_run)
|
||||
|
||||
|
@ -907,7 +952,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
|
|||
</TabItem>
|
||||
<TabItem value="render" label="Render deploy">
|
||||
|
||||
### Deploy on Render https://render.com/
|
||||
### Render
|
||||
|
||||
https://render.com/
|
||||
|
||||
<iframe width="840" height="500" src="https://www.loom.com/embed/805964b3c8384b41be180a61442389a3" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
|
||||
|
||||
|
@ -916,7 +963,9 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
|
|||
</TabItem>
|
||||
<TabItem value="railway" label="Railway">
|
||||
|
||||
### Deploy on Railway https://railway.app
|
||||
### Railway
|
||||
|
||||
https://railway.app
|
||||
|
||||
**Step 1: Click the button** to deploy to Railway
|
||||
|
||||
|
@ -930,7 +979,7 @@ curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \
|
|||
|
||||
## Extras
|
||||
|
||||
### Run with docker compose
|
||||
### Docker compose
|
||||
|
||||
**Step 1**
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
model_list:
|
||||
- model_name: "fake-openai-endpoint"
|
||||
- model_name: "gpt-4"
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue