diff --git a/.gitignore b/.gitignore index b31366a33..360bba798 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,6 @@ litellm/tests/langfuse.log litellm/tests/dynamo*.log .vscode/settings.json litellm/proxy/log.txt +proxy_server_config_@.yaml +.gitignore +proxy_server_config_2.yaml diff --git a/Dockerfile b/Dockerfile index c9dba6615..b76aaf1d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,35 +19,32 @@ RUN pip install --upgrade pip && \ pip install build # Copy the current directory contents into the container at /app -COPY requirements.txt . +COPY . . # Build the package RUN rm -rf dist/* && python -m build +# There should be only one wheel file now, assume the build only creates one +RUN ls -1 dist/*.whl | head -1 + # Install the package RUN pip install dist/*.whl -# Install any needed packages specified in requirements.txt -RUN pip install wheel && \ - pip wheel --no-cache-dir --wheel-dir=/app/wheels -r requirements.txt - -# Clear out any existing builds and build the package -RUN rm -rf dist/* && python -m build - -# There should be only one wheel file now, assume the build only creates one -RUN ls -1 dist/*.whl | head -1 +# install dependencies as wheels +RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt # Runtime stage FROM $LITELLM_RUNTIME_IMAGE as runtime WORKDIR /app -# Depending on wheel naming patterns, use a wildcard if multiple versions are possible # Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present COPY --from=builder /app/dist/*.whl . +COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file -RUN pip install *.whl && rm -f *.whl +RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels + EXPOSE 4000/tcp diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index b014667df..70036ae74 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -153,29 +153,29 @@ class AzureChatCompletion(BaseLLM): "messages": messages, **optional_params } - ## LOGGING - logging_obj.pre_call( - input=messages, - api_key=api_key, - additional_args={ - "headers": { - "api_key": api_key, - "azure_ad_token": azure_ad_token - }, - "api_version": api_version, - "api_base": api_base, - "complete_input_dict": data, - }, - ) if acompletion is True: if optional_params.get("stream", False): return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client) else: - return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client) + return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client, logging_obj=logging_obj) elif "stream" in optional_params and optional_params["stream"] == True: return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client) else: + ## LOGGING + logging_obj.pre_call( + input=messages, + api_key=api_key, + additional_args={ + "headers": { + "api_key": api_key, + "azure_ad_token": azure_ad_token + }, + "api_version": api_version, + "api_base": api_base, + "complete_input_dict": data, + }, + ) if not isinstance(max_retries, int): raise AzureOpenAIError(status_code=422, message="max retries must be an int") # init AzureOpenAI Client @@ -225,6 +225,7 @@ class AzureChatCompletion(BaseLLM): model_response: ModelResponse, azure_ad_token: Optional[str]=None, client = None, # this is the AsyncAzureOpenAI + logging_obj=None, ): response = None try: @@ -248,13 +249,19 @@ class AzureChatCompletion(BaseLLM): azure_client = AsyncAzureOpenAI(**azure_client_params) else: azure_client = client + ## LOGGING + logging_obj.pre_call( + input=data['messages'], + api_key=azure_client.api_key, + additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data}, + ) response = await azure_client.chat.completions.create(**data) return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response) except AzureOpenAIError as e: exception_mapping_worked = True raise e except Exception as e: - raise e + raise AzureOpenAIError(status_code=500, message=str(e)) def streaming(self, logging_obj, @@ -319,6 +326,12 @@ class AzureChatCompletion(BaseLLM): azure_client = AsyncAzureOpenAI(**azure_client_params) else: azure_client = client + ## LOGGING + logging_obj.pre_call( + input=data['messages'], + api_key=azure_client.api_key, + additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data}, + ) response = await azure_client.chat.completions.create(**data) streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj) async for transformed_chunk in streamwrapper: diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e942d4d41..13cdc2d02 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -18,7 +18,6 @@ try: import appdirs import backoff import yaml - import rq import orjson except ImportError: import sys @@ -34,7 +33,6 @@ except ImportError: "appdirs", "backoff", "pyyaml", - "rq", "orjson" ] ) @@ -111,7 +109,6 @@ from fastapi.security.api_key import APIKeyHeader import json import logging from typing import Union -# from litellm.proxy.queue import start_rq_worker_in_background app = FastAPI(docs_url="/", title="LiteLLM API") router = APIRouter() diff --git a/requirements.txt b/requirements.txt index 986580d7a..3cf315935 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,26 @@ # LITELLM PROXY DEPENDENCIES # -openai>=1.0.0 -fastapi -tomli -pydantic>=2.5 -appdirs -tomli_w -backoff -pyyaml -uvicorn -boto3 -redis -pyyaml -rq -prisma -celery -psutil -mangum -google-generativeai -traceloop-sdk==0.5.3 -langfuse==1.14.0 +anyio==4.2.0 # openai + http req. +openai>=1.0.0 # openai req. +fastapi # server dep +pydantic>=2.5 # openai req. +appdirs # server dep +backoff # server dep +pyyaml # server dep +uvicorn # server dep +boto3 # aws bedrock/sagemaker calls +redis # caching +prisma # for db +mangum # for aws lambda functions +google-generativeai # for vertex ai calls +traceloop-sdk==0.5.3 # for open telemetry logging +langfuse==1.14.0 # for langfuse self-hosted logging +### LITELLM PACKAGE DEPENDENCIES +python-dotenv>=0.2.0 # for env +tiktoken>=0.4.0 # for calculating usage +importlib-metadata>=6.8.0 # for random utils +tokenizers # for calculating usage +click # for proxy cli +jinja2==3.1.2 # for prompt templates +certifi>=2023.7.22 # [TODO] clean up +aiohttp # for network calls +#### \ No newline at end of file