forked from phoenix/litellm-mirror
build(Dockerfile): fixing build requirements
This commit is contained in:
parent
50b741f8fa
commit
3923c389fd
5 changed files with 66 additions and 51 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -25,3 +25,6 @@ litellm/tests/langfuse.log
|
||||||
litellm/tests/dynamo*.log
|
litellm/tests/dynamo*.log
|
||||||
.vscode/settings.json
|
.vscode/settings.json
|
||||||
litellm/proxy/log.txt
|
litellm/proxy/log.txt
|
||||||
|
proxy_server_config_@.yaml
|
||||||
|
.gitignore
|
||||||
|
proxy_server_config_2.yaml
|
||||||
|
|
21
Dockerfile
21
Dockerfile
|
@ -19,35 +19,32 @@ RUN pip install --upgrade pip && \
|
||||||
pip install build
|
pip install build
|
||||||
|
|
||||||
# Copy the current directory contents into the container at /app
|
# Copy the current directory contents into the container at /app
|
||||||
COPY requirements.txt .
|
COPY . .
|
||||||
|
|
||||||
# Build the package
|
# Build the package
|
||||||
RUN rm -rf dist/* && python -m build
|
RUN rm -rf dist/* && python -m build
|
||||||
|
|
||||||
|
# There should be only one wheel file now, assume the build only creates one
|
||||||
|
RUN ls -1 dist/*.whl | head -1
|
||||||
|
|
||||||
# Install the package
|
# Install the package
|
||||||
RUN pip install dist/*.whl
|
RUN pip install dist/*.whl
|
||||||
|
|
||||||
# Install any needed packages specified in requirements.txt
|
# install dependencies as wheels
|
||||||
RUN pip install wheel && \
|
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
||||||
pip wheel --no-cache-dir --wheel-dir=/app/wheels -r requirements.txt
|
|
||||||
|
|
||||||
# Clear out any existing builds and build the package
|
|
||||||
RUN rm -rf dist/* && python -m build
|
|
||||||
|
|
||||||
# There should be only one wheel file now, assume the build only creates one
|
|
||||||
RUN ls -1 dist/*.whl | head -1
|
|
||||||
|
|
||||||
# Runtime stage
|
# Runtime stage
|
||||||
FROM $LITELLM_RUNTIME_IMAGE as runtime
|
FROM $LITELLM_RUNTIME_IMAGE as runtime
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Depending on wheel naming patterns, use a wildcard if multiple versions are possible
|
|
||||||
# Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
|
# Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
|
||||||
COPY --from=builder /app/dist/*.whl .
|
COPY --from=builder /app/dist/*.whl .
|
||||||
|
COPY --from=builder /wheels/ /wheels/
|
||||||
|
|
||||||
# Install the built wheel using pip; again using a wildcard if it's the only file
|
# Install the built wheel using pip; again using a wildcard if it's the only file
|
||||||
RUN pip install *.whl && rm -f *.whl
|
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
||||||
|
|
||||||
|
|
||||||
EXPOSE 4000/tcp
|
EXPOSE 4000/tcp
|
||||||
|
|
||||||
|
|
|
@ -153,6 +153,15 @@ class AzureChatCompletion(BaseLLM):
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
**optional_params
|
**optional_params
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if acompletion is True:
|
||||||
|
if optional_params.get("stream", False):
|
||||||
|
return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
|
||||||
|
else:
|
||||||
|
return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client, logging_obj=logging_obj)
|
||||||
|
elif "stream" in optional_params and optional_params["stream"] == True:
|
||||||
|
return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
|
||||||
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
@ -167,15 +176,6 @@ class AzureChatCompletion(BaseLLM):
|
||||||
"complete_input_dict": data,
|
"complete_input_dict": data,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if acompletion is True:
|
|
||||||
if optional_params.get("stream", False):
|
|
||||||
return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
|
|
||||||
else:
|
|
||||||
return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
|
|
||||||
elif "stream" in optional_params and optional_params["stream"] == True:
|
|
||||||
return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
|
|
||||||
else:
|
|
||||||
if not isinstance(max_retries, int):
|
if not isinstance(max_retries, int):
|
||||||
raise AzureOpenAIError(status_code=422, message="max retries must be an int")
|
raise AzureOpenAIError(status_code=422, message="max retries must be an int")
|
||||||
# init AzureOpenAI Client
|
# init AzureOpenAI Client
|
||||||
|
@ -225,6 +225,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
azure_ad_token: Optional[str]=None,
|
azure_ad_token: Optional[str]=None,
|
||||||
client = None, # this is the AsyncAzureOpenAI
|
client = None, # this is the AsyncAzureOpenAI
|
||||||
|
logging_obj=None,
|
||||||
):
|
):
|
||||||
response = None
|
response = None
|
||||||
try:
|
try:
|
||||||
|
@ -248,13 +249,19 @@ class AzureChatCompletion(BaseLLM):
|
||||||
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
||||||
else:
|
else:
|
||||||
azure_client = client
|
azure_client = client
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=data['messages'],
|
||||||
|
api_key=azure_client.api_key,
|
||||||
|
additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
|
||||||
|
)
|
||||||
response = await azure_client.chat.completions.create(**data)
|
response = await azure_client.chat.completions.create(**data)
|
||||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||||
except AzureOpenAIError as e:
|
except AzureOpenAIError as e:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise AzureOpenAIError(status_code=500, message=str(e))
|
||||||
|
|
||||||
def streaming(self,
|
def streaming(self,
|
||||||
logging_obj,
|
logging_obj,
|
||||||
|
@ -319,6 +326,12 @@ class AzureChatCompletion(BaseLLM):
|
||||||
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
||||||
else:
|
else:
|
||||||
azure_client = client
|
azure_client = client
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=data['messages'],
|
||||||
|
api_key=azure_client.api_key,
|
||||||
|
additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
|
||||||
|
)
|
||||||
response = await azure_client.chat.completions.create(**data)
|
response = await azure_client.chat.completions.create(**data)
|
||||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
|
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
|
||||||
async for transformed_chunk in streamwrapper:
|
async for transformed_chunk in streamwrapper:
|
||||||
|
|
|
@ -18,7 +18,6 @@ try:
|
||||||
import appdirs
|
import appdirs
|
||||||
import backoff
|
import backoff
|
||||||
import yaml
|
import yaml
|
||||||
import rq
|
|
||||||
import orjson
|
import orjson
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import sys
|
import sys
|
||||||
|
@ -34,7 +33,6 @@ except ImportError:
|
||||||
"appdirs",
|
"appdirs",
|
||||||
"backoff",
|
"backoff",
|
||||||
"pyyaml",
|
"pyyaml",
|
||||||
"rq",
|
|
||||||
"orjson"
|
"orjson"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -111,7 +109,6 @@ from fastapi.security.api_key import APIKeyHeader
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Union
|
from typing import Union
|
||||||
# from litellm.proxy.queue import start_rq_worker_in_background
|
|
||||||
|
|
||||||
app = FastAPI(docs_url="/", title="LiteLLM API")
|
app = FastAPI(docs_url="/", title="LiteLLM API")
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
|
@ -1,21 +1,26 @@
|
||||||
# LITELLM PROXY DEPENDENCIES #
|
# LITELLM PROXY DEPENDENCIES #
|
||||||
openai>=1.0.0
|
anyio==4.2.0 # openai + http req.
|
||||||
fastapi
|
openai>=1.0.0 # openai req.
|
||||||
tomli
|
fastapi # server dep
|
||||||
pydantic>=2.5
|
pydantic>=2.5 # openai req.
|
||||||
appdirs
|
appdirs # server dep
|
||||||
tomli_w
|
backoff # server dep
|
||||||
backoff
|
pyyaml # server dep
|
||||||
pyyaml
|
uvicorn # server dep
|
||||||
uvicorn
|
boto3 # aws bedrock/sagemaker calls
|
||||||
boto3
|
redis # caching
|
||||||
redis
|
prisma # for db
|
||||||
pyyaml
|
mangum # for aws lambda functions
|
||||||
rq
|
google-generativeai # for vertex ai calls
|
||||||
prisma
|
traceloop-sdk==0.5.3 # for open telemetry logging
|
||||||
celery
|
langfuse==1.14.0 # for langfuse self-hosted logging
|
||||||
psutil
|
### LITELLM PACKAGE DEPENDENCIES
|
||||||
mangum
|
python-dotenv>=0.2.0 # for env
|
||||||
google-generativeai
|
tiktoken>=0.4.0 # for calculating usage
|
||||||
traceloop-sdk==0.5.3
|
importlib-metadata>=6.8.0 # for random utils
|
||||||
langfuse==1.14.0
|
tokenizers # for calculating usage
|
||||||
|
click # for proxy cli
|
||||||
|
jinja2==3.1.2 # for prompt templates
|
||||||
|
certifi>=2023.7.22 # [TODO] clean up
|
||||||
|
aiohttp # for network calls
|
||||||
|
####
|
Loading…
Add table
Add a link
Reference in a new issue