build(Dockerfile): fixing build requirements

This commit is contained in:
Krrish Dholakia 2023-12-16 17:52:30 -08:00
parent 50b741f8fa
commit 3923c389fd
5 changed files with 66 additions and 51 deletions

3
.gitignore vendored
View file

@ -25,3 +25,6 @@ litellm/tests/langfuse.log
litellm/tests/dynamo*.log litellm/tests/dynamo*.log
.vscode/settings.json .vscode/settings.json
litellm/proxy/log.txt litellm/proxy/log.txt
proxy_server_config_@.yaml
.gitignore
proxy_server_config_2.yaml

View file

@ -19,35 +19,32 @@ RUN pip install --upgrade pip && \
pip install build pip install build
# Copy the current directory contents into the container at /app # Copy the current directory contents into the container at /app
COPY requirements.txt . COPY . .
# Build the package # Build the package
RUN rm -rf dist/* && python -m build RUN rm -rf dist/* && python -m build
# There should be only one wheel file now, assume the build only creates one
RUN ls -1 dist/*.whl | head -1
# Install the package # Install the package
RUN pip install dist/*.whl RUN pip install dist/*.whl
# Install any needed packages specified in requirements.txt # install dependencies as wheels
RUN pip install wheel && \ RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
pip wheel --no-cache-dir --wheel-dir=/app/wheels -r requirements.txt
# Clear out any existing builds and build the package
RUN rm -rf dist/* && python -m build
# There should be only one wheel file now, assume the build only creates one
RUN ls -1 dist/*.whl | head -1
# Runtime stage # Runtime stage
FROM $LITELLM_RUNTIME_IMAGE as runtime FROM $LITELLM_RUNTIME_IMAGE as runtime
WORKDIR /app WORKDIR /app
# Depending on wheel naming patterns, use a wildcard if multiple versions are possible
# Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present # Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
COPY --from=builder /app/dist/*.whl . COPY --from=builder /app/dist/*.whl .
COPY --from=builder /wheels/ /wheels/
# Install the built wheel using pip; again using a wildcard if it's the only file # Install the built wheel using pip; again using a wildcard if it's the only file
RUN pip install *.whl && rm -f *.whl RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
EXPOSE 4000/tcp EXPOSE 4000/tcp

View file

@ -153,6 +153,15 @@ class AzureChatCompletion(BaseLLM):
"messages": messages, "messages": messages,
**optional_params **optional_params
} }
if acompletion is True:
if optional_params.get("stream", False):
return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
else:
return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client, logging_obj=logging_obj)
elif "stream" in optional_params and optional_params["stream"] == True:
return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
else:
## LOGGING ## LOGGING
logging_obj.pre_call( logging_obj.pre_call(
input=messages, input=messages,
@ -167,15 +176,6 @@ class AzureChatCompletion(BaseLLM):
"complete_input_dict": data, "complete_input_dict": data,
}, },
) )
if acompletion is True:
if optional_params.get("stream", False):
return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
else:
return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
elif "stream" in optional_params and optional_params["stream"] == True:
return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
else:
if not isinstance(max_retries, int): if not isinstance(max_retries, int):
raise AzureOpenAIError(status_code=422, message="max retries must be an int") raise AzureOpenAIError(status_code=422, message="max retries must be an int")
# init AzureOpenAI Client # init AzureOpenAI Client
@ -225,6 +225,7 @@ class AzureChatCompletion(BaseLLM):
model_response: ModelResponse, model_response: ModelResponse,
azure_ad_token: Optional[str]=None, azure_ad_token: Optional[str]=None,
client = None, # this is the AsyncAzureOpenAI client = None, # this is the AsyncAzureOpenAI
logging_obj=None,
): ):
response = None response = None
try: try:
@ -248,13 +249,19 @@ class AzureChatCompletion(BaseLLM):
azure_client = AsyncAzureOpenAI(**azure_client_params) azure_client = AsyncAzureOpenAI(**azure_client_params)
else: else:
azure_client = client azure_client = client
## LOGGING
logging_obj.pre_call(
input=data['messages'],
api_key=azure_client.api_key,
additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
)
response = await azure_client.chat.completions.create(**data) response = await azure_client.chat.completions.create(**data)
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response) return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
except AzureOpenAIError as e: except AzureOpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
raise e raise e
except Exception as e: except Exception as e:
raise e raise AzureOpenAIError(status_code=500, message=str(e))
def streaming(self, def streaming(self,
logging_obj, logging_obj,
@ -319,6 +326,12 @@ class AzureChatCompletion(BaseLLM):
azure_client = AsyncAzureOpenAI(**azure_client_params) azure_client = AsyncAzureOpenAI(**azure_client_params)
else: else:
azure_client = client azure_client = client
## LOGGING
logging_obj.pre_call(
input=data['messages'],
api_key=azure_client.api_key,
additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
)
response = await azure_client.chat.completions.create(**data) response = await azure_client.chat.completions.create(**data)
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj) streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
async for transformed_chunk in streamwrapper: async for transformed_chunk in streamwrapper:

View file

@ -18,7 +18,6 @@ try:
import appdirs import appdirs
import backoff import backoff
import yaml import yaml
import rq
import orjson import orjson
except ImportError: except ImportError:
import sys import sys
@ -34,7 +33,6 @@ except ImportError:
"appdirs", "appdirs",
"backoff", "backoff",
"pyyaml", "pyyaml",
"rq",
"orjson" "orjson"
] ]
) )
@ -111,7 +109,6 @@ from fastapi.security.api_key import APIKeyHeader
import json import json
import logging import logging
from typing import Union from typing import Union
# from litellm.proxy.queue import start_rq_worker_in_background
app = FastAPI(docs_url="/", title="LiteLLM API") app = FastAPI(docs_url="/", title="LiteLLM API")
router = APIRouter() router = APIRouter()

View file

@ -1,21 +1,26 @@
# LITELLM PROXY DEPENDENCIES # # LITELLM PROXY DEPENDENCIES #
openai>=1.0.0 anyio==4.2.0 # openai + http req.
fastapi openai>=1.0.0 # openai req.
tomli fastapi # server dep
pydantic>=2.5 pydantic>=2.5 # openai req.
appdirs appdirs # server dep
tomli_w backoff # server dep
backoff pyyaml # server dep
pyyaml uvicorn # server dep
uvicorn boto3 # aws bedrock/sagemaker calls
boto3 redis # caching
redis prisma # for db
pyyaml mangum # for aws lambda functions
rq google-generativeai # for vertex ai calls
prisma traceloop-sdk==0.5.3 # for open telemetry logging
celery langfuse==1.14.0 # for langfuse self-hosted logging
psutil ### LITELLM PACKAGE DEPENDENCIES
mangum python-dotenv>=0.2.0 # for env
google-generativeai tiktoken>=0.4.0 # for calculating usage
traceloop-sdk==0.5.3 importlib-metadata>=6.8.0 # for random utils
langfuse==1.14.0 tokenizers # for calculating usage
click # for proxy cli
jinja2==3.1.2 # for prompt templates
certifi>=2023.7.22 # [TODO] clean up
aiohttp # for network calls
####