refactor(openai_proxy-->-litellm_server): renaming project for simplicity

2023-10-25 14:14:32 -07:00 · 2023-10-25 14:14:32 -07:00 · 16f39ec840
commit 16f39ec840
parent f6be642f2f
15 changed files with 1 additions and 1 deletions
--- a/litellm_server/.env.template
+++ b/litellm_server/.env.template
@ -0,0 +1,40 @@
+# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
+AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
+
+OPENAI_API_KEY = ""
+
+HUGGINGFACE_API_KEY="" 
+
+TOGETHERAI_API_KEY=""
+
+REPLICATE_API_KEY="" 
+
+## bedrock / sagemaker
+AWS_ACCESS_KEY_ID = "" 
+AWS_SECRET_ACCESS_KEY = ""
+
+AZURE_API_KEY = ""
+AZURE_API_BASE = ""
+AZURE_API_VERSION = ""
+
+ANTHROPIC_API_KEY = ""
+
+COHERE_API_KEY = ""
+
+## LOGGING ## 
+
+SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
+
+### LANGFUSE
+LANGFUSE_PUBLIC_KEY = ""
+LANGFUSE_SECRET_KEY = ""
+# Optional, defaults to https://cloud.langfuse.com
+LANGFUSE_HOST = "" # optional
+
+
+## CACHING ## 
+
+### REDIS
+REDIS_HOST = "" 
+REDIS_PORT = "" 
+REDIS_PASSWORD = "" 
--- a/litellm_server/Dockerfile
+++ b/litellm_server/Dockerfile
@ -0,0 +1,10 @@
+FROM python:3.10
+
+ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
+COPY . /app
+WORKDIR /app
+RUN pip install -r requirements.txt
+
+EXPOSE $PORT 
+
+CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT
--- a/litellm_server/README.md
+++ b/litellm_server/README.md
@ -0,0 +1,63 @@
+# litellm-server
+
+A simple, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
+
+<p align="center" style="margin: 2%">
+        <a href="https://l.linklyhq.com/l/1uHsr" target="_blank">
+                <img src="https://render.com/images/deploy-to-render-button.svg" width="173"/>
+        </a>
+        <a href="https://l.linklyhq.com/l/1uHtX" target="_blank">
+                <img src="https://deploy.cloud.run/button.svg" width="200"/>
+        </a>
+</p>
+
+## Usage 
+
+```shell
+docker run -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
+
+# UVICORN: OpenAI Proxy running on http://0.0.0.0:8000
+```
+
+## Endpoints:
+- `/chat/completions` - chat completions endpoint to call 100+ LLMs
+- `/router/completions` - for multiple deployments of the same model (e.g. Azure OpenAI), uses the least used deployment. [Learn more](https://docs.litellm.ai/docs/routing)
+- `/models` - available models on server
+
+## Making Requests to Proxy
+### Curl
+
+**Call OpenAI**
+```shell
+curl http://0.0.0.0:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+     "model": "gpt-3.5-turbo",
+     "messages": [{"role": "user", "content": "Say this is a test!"}],
+     "temperature": 0.7
+   }'
+```
+**Call Bedrock**
+```shell
+curl http://0.0.0.0:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+     "model": "bedrock/anthropic.claude-instant-v1",
+     "messages": [{"role": "user", "content": "Say this is a test!"}],
+     "temperature": 0.7
+   }'
+```
+
+### Running Locally
+```shell 
+$ git clone https://github.com/BerriAI/litellm.git
+```
+```shell
+$ cd ./litellm/openai-proxy
+```
+
+```shell
+$ uvicorn main:app --host 0.0.0.0 --port 8000
+```
+
+[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/simple_proxy)
--- a/litellm_server/init.py
+++ b/litellm_server/init.py
@ -0,0 +1,2 @@
+from .main import *
+from .utils import *
--- a/litellm_server/config
+++ b/litellm_server/config
--- a/litellm_server/main.py
+++ b/litellm_server/main.py
@ -0,0 +1,160 @@
+import litellm, os, traceback
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.routing import APIRouter
+from fastapi.responses import StreamingResponse, FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+import json
+import os
+from typing import Optional
+try:
+    from utils import set_callbacks, load_router_config
+except ImportError:
+    from openai_proxy.utils import set_callbacks, load_router_config
+import dotenv
+dotenv.load_dotenv() # load env variables
+
+app = FastAPI(docs_url="/", title="LiteLLM API")
+router = APIRouter()
+origins = ["*"]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+#### GLOBAL VARIABLES ####
+llm_router: Optional[litellm.Router] = None
+
+set_callbacks() # sets litellm callbacks for logging if they exist in the environment 
+llm_router = load_router_config(router=llm_router)
+#### API ENDPOINTS ####
+@router.post("/v1/models")
+@router.get("/models")  # if project requires model list
+def model_list():
+    all_models = litellm.utils.get_valid_models()
+    return dict(
+        data=[
+            {
+                "id": model,
+                "object": "model",
+                "created": 1677610602,
+                "owned_by": "openai",
+            }
+            for model in all_models
+        ],
+        object="list",
+    )
+# for streaming
+def data_generator(response):
+    print("inside generator")
+    for chunk in response:
+        print(f"returned chunk: {chunk}")
+        yield f"data: {json.dumps(chunk)}\n\n"
+
+@router.post("/v1/completions")
+@router.post("/completions")
+async def completion(request: Request):
+    data = await request.json()
+    response = litellm.completion(
+        **data
+    )
+    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+            return StreamingResponse(data_generator(response), media_type='text/event-stream')
+    return response
+
+@router.post("/v1/embeddings")
+@router.post("/embeddings")
+async def embedding(request: Request):
+    try: 
+        data = await request.json() 
+        # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+        if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
+            api_key = request.headers.get("authorization")
+            api_key = api_key.replace("Bearer", "").strip() 
+            if len(api_key.strip()) > 0:
+                api_key = api_key
+                data["api_key"] = api_key
+        response = litellm.embedding(
+            **data
+        )
+        return response
+    except Exception as e:
+        error_traceback = traceback.format_exc()
+        error_msg = f"{str(e)}\n\n{error_traceback}"
+        return {"error": error_msg}
+
+@router.post("/v1/chat/completions")
+@router.post("/chat/completions")
+async def chat_completion(request: Request):
+    try:
+        data = await request.json()
+        # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+        if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
+            api_key = request.headers.get("authorization")
+            api_key = api_key.replace("Bearer", "").strip() 
+            if len(api_key.strip()) > 0:
+                api_key = api_key
+                data["api_key"] = api_key
+        response = litellm.completion(
+            **data
+        )
+        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+                return StreamingResponse(data_generator(response), media_type='text/event-stream')
+        return response
+    except Exception as e:
+        error_traceback = traceback.format_exc()
+        error_msg = f"{str(e)}\n\n{error_traceback}"
+        return {"error": error_msg}
+        # raise HTTPException(status_code=500, detail=error_msg)
+
+@router.post("/router/completions")
+async def router_completion(request: Request):
+    global llm_router
+    try: 
+        data = await request.json()
+        if "model_list" in data: 
+            llm_router = litellm.Router(model_list=data.pop("model_list"))
+        if llm_router is None: 
+            raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+        
+        # openai.ChatCompletion.create replacement
+        response = await llm_router.acompletion(model="gpt-3.5-turbo", 
+                        messages=[{"role": "user", "content": "Hey, how's it going?"}])
+
+        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+                return StreamingResponse(data_generator(response), media_type='text/event-stream')
+        return response
+    except Exception as e: 
+        error_traceback = traceback.format_exc()
+        error_msg = f"{str(e)}\n\n{error_traceback}"
+        return {"error": error_msg}
+
+@router.post("/router/embedding")
+async def router_embedding(request: Request):
+    global llm_router
+    try: 
+        data = await request.json()
+        if "model_list" in data: 
+            llm_router = litellm.Router(model_list=data.pop("model_list"))
+        if llm_router is None: 
+            raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+
+        response = await llm_router.aembedding(model="gpt-3.5-turbo", 
+                        messages=[{"role": "user", "content": "Hey, how's it going?"}])
+
+        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+                return StreamingResponse(data_generator(response), media_type='text/event-stream')
+        return response
+    except Exception as e: 
+        error_traceback = traceback.format_exc()
+        error_msg = f"{str(e)}\n\n{error_traceback}"
+        return {"error": error_msg}
+
+@router.get("/")
+async def home(request: Request):
+    return "LiteLLM: RUNNING"
+
+
+app.include_router(router)
--- a/litellm_server/openapi.json
+++ b/litellm_server/openapi.json
@ -0,0 +1,242 @@
+{
+    "openapi": "3.0.0",
+    "info": {
+      "version": "1.0.0",
+      "title": "LiteLLM API",
+      "description": "API for LiteLLM"
+    },
+    "paths": {
+      "/chat/completions": {
+        "post": {
+          "summary": "Create chat completion for 100+ LLM APIs",
+          "requestBody": {
+            "description": "Input parameters for chat completions",
+            "required": true,
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionsRequest"
+                },
+                "example": {
+                    "model": "gpt-3.5-turbo",
+                    "messages": [
+                      {
+                        "role": "system",
+                        "content": "You are a helpful assistant."
+                      },
+                      {
+                        "role": "user",
+                        "content": "this is a test message from litellm proxy, can you ack"
+                      }
+                    ],
+                    "frequency_penalty": 0.0,
+                    "max_tokens": 500,
+                    "n": 1,
+                    "presence_penalty": 0.0,
+                    "stop": "###",
+                    "stream": false,
+                    "temperature": 0.7,
+                    "top_p": 0.8,
+                    "user": "test-litellm"
+                  }
+  
+              }
+            }
+          },
+          "responses": {
+            "200": {
+              "description": "Successful operation",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/ChatCompletionsResponse"
+                  },
+                  "example": {
+                  "object": "chat.completion",
+                  "id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
+                  "choices": [
+                    {
+                      "finish_reason": "stop_sequence",
+                      "index": 0,
+                      "message": {
+                        "content": "I'm a large language model trained by OpenAI, ACK receiving this message",
+                        "role": "assistant"
+                      }
+                    }
+                  ],
+                  "created": 1698253693.169062,
+                  "model": "gpt-3.5-turbo",
+                  "usage": {
+                    "prompt_tokens": 14,
+                    "completion_tokens": 102,
+                    "total_tokens": 116
+                  }
+                }
+  
+                }
+              }
+            },
+            "500": {
+              "description": "Server error"
+            }
+          }
+        }
+      },
+      "/models": {
+          "get": {
+            "summary": "Get models",
+            "responses": {
+              "200": {
+                "description": "Successful operation"
+              }
+            }
+          }
+        },
+        "/": {
+          "get": {
+            "summary": "Swagger docs",
+            "responses": {
+              "200": {
+                "description": "Successful operation"
+              }
+            }
+          }
+        }
+    },
+    "components": {
+      "schemas": {
+        "ChatCompletionsRequest": {
+          "type": "object",
+          "properties": {
+            "messages": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "role": {
+                    "type": "string"
+                  },
+                  "content": {
+                    "type": "string"
+                  }
+                },
+                "required": ["role", "content"]
+              }
+            },
+            "model": {
+              "type": "string"
+            },
+            "frequency_penalty": {
+              "type": "number"
+            },
+            "function_call": {
+              "type": ["string", "object"]
+            },
+            "functions": {
+              "type": "array"
+            },
+            "logit_bias": {
+              "type": "object"
+            },
+            "max_tokens": {
+              "type": "integer"
+            },
+            "n": {
+              "type": "integer"
+            },
+            "presence_penalty": {
+              "type": "number"
+            },
+            "stop": {
+              "oneOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  }
+                }
+              ]
+            },
+            "stream": {
+              "type": "boolean"
+            },
+            "temperature": {
+              "type": "number"
+            },
+            "top_p": {
+              "type": "number"
+            },
+            "user": {
+              "type": "string"
+            }
+          },
+          "required": ["messages", "model"]
+        },
+        "ChatCompletionsResponse": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string"
+          },
+          "choices": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "finish_reason": {
+                  "type": "string"
+                },
+                "index": {
+                  "type": "integer"
+                },
+                "message": {
+                  "type": "object",
+                  "properties": {
+                    "content": {
+                      "type": "string"
+                    },
+                    "role": {
+                      "type": "string"
+                    }
+                  },
+                  "required": ["content", "role"]
+                },
+                "usage": {
+                  "type": "object",
+                  "properties": {
+                    "prompt_tokens": {
+                      "type": "integer"
+                    },
+                    "completion_tokens": {
+                      "type": "integer"
+                    },
+                    "total_tokens": {
+                      "type": "integer"
+                    }
+                  },
+                  "required": ["prompt_tokens", "completion_tokens", "total_tokens"]
+                }
+              },
+              "required": ["finish_reason", "index", "message", "usage"]
+            }
+          },
+          "id": {
+            "type": "string"
+          },
+          "created": {
+            "type": "number"
+          },
+          "model": {
+            "type": "string"
+          }
+        },
+        "required": ["object", "choices", "id", "created", "model"]
+      }
+  
+      }
+    }
+  }
+  
--- a/litellm_server/requirements.txt
+++ b/litellm_server/requirements.txt
@ -0,0 +1,7 @@
+openai
+fastapi
+uvicorn
+boto3
+litellm
+python-dotenv
+redis
--- a/litellm_server/tests/test_bedrock.py
+++ b/litellm_server/tests/test_bedrock.py
@ -0,0 +1,39 @@
+import openai
+openai.api_base = "http://0.0.0.0:8000"
+print("making request")
+openai.api_key = "anything" # this gets passed as a header 
+
+
+response = openai.ChatCompletion.create(
+    model = "bedrock/anthropic.claude-instant-v1",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    aws_access_key_id="",
+    aws_secret_access_key="",
+    aws_region_name="us-west-2",
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+# response = openai.ChatCompletion.create(
+#     model = "gpt-3.5-turbo",
+#     messages = [
+#         {
+#             "role": "user",
+#             "content": "this is a test message, what model / llm are you"
+#         }
+#     ],
+#     max_tokens = 10,
+#     stream=True
+# )
+
+
+# for chunk in response:
+#     print(chunk)
--- a/litellm_server/tests/test_caching.py
+++ b/litellm_server/tests/test_caching.py
@ -0,0 +1,80 @@
+import openai, os, dotenv, traceback, time
+openai.api_base = "http://0.0.0.0:8000"
+dotenv.load_dotenv()
+openai.api_key = os.getenv("ANTHROPIC_API_KEY") # this gets passed as a header 
+
+
+response1 = openai.ChatCompletion.create(
+    model = "claude-instant-1",
+    messages = [
+        {
+            "role": "user",
+            "content": "write a short poem about litellm"
+        }
+    ],
+)
+
+try:
+    print(f"response: {response1['choices'][0]['message']['content']}")
+except:
+    print(f"response: {response1}")
+
+time.sleep(1) # allow time for request to be stored 
+
+response2 = openai.ChatCompletion.create(
+    model = "claude-instant-1",
+    messages = [
+        {
+            "role": "user",
+            "content": "write a short poem about litellm"
+        }
+    ],
+)
+
+try:
+    print(f"response: {response2['choices'][0]['message']['content']}")
+except:
+    print(f"response: {response2}")
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+try: 
+    response3 = openai.ChatCompletion.create(
+        model = "gpt-3.5-turbo",
+        messages = [
+            {
+                "role": "user",
+                "content": "write a short poem about litellm"
+            }
+        ],
+    )
+except Exception as e: 
+    traceback.print_exc()
+
+try:
+    print(f"response: {response3['choices'][0]['message']['content']}")
+except:
+    print(f"response: {response3}")
+
+openai.api_key = os.getenv("ANTHROPIC_API_KEY") # this gets passed as a header 
+# switch caching off using cache flag
+response4 = openai.ChatCompletion.create(
+    model = "claude-instant-1",
+    messages = [
+        {
+            "role": "user",
+            "content": "write a short poem about litellm"
+        }
+    ],
+    caching = False,
+)
+
+try:
+    print(f"response: {response4['choices'][0]['message']['content']}")
+except:
+    print(f"response: {response4}")
+
+assert response1["choices"][0]["message"]["content"] == response2["choices"][0]["message"]["content"] 
+assert response1["choices"][0]["message"]["content"] != response4["choices"][0]["message"]["content"] 
+
+assert response1["choices"][0]["message"]["content"] != response3["choices"][0]["message"]["content"] 
--- a/litellm_server/tests/test_openai.py
+++ b/litellm_server/tests/test_openai.py
@ -0,0 +1,39 @@
+import openai
+openai.api_base = "http://127.0.0.1:8000"
+openai.api_key = "this can be anything"
+
+print("making request")
+
+api_key = "" 
+response = openai.ChatCompletion.create(
+    model = "gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+response = openai.ChatCompletion.create(
+    model = "gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+    stream=True
+)
+
+
+for chunk in response:
+    print(chunk)
--- a/litellm_server/tests/test_openrouter.py
+++ b/litellm_server/tests/test_openrouter.py
@ -0,0 +1,38 @@
+import openai
+openai.api_base = "http://0.0.0.0:8000"
+openai.api_key = "this can be anything"
+print("making request")
+
+api_key = ""
+response = openai.ChatCompletion.create(
+    model = "openrouter/google/palm-2-chat-bison",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+response = openai.ChatCompletion.create(
+    model = "openrouter/google/palm-2-chat-bison",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+    stream=True
+)
+
+
+for chunk in response:
+    print(chunk)
--- a/litellm_server/tests/test_router.py
+++ b/litellm_server/tests/test_router.py
@ -0,0 +1,59 @@
+#### What this tests ####
+#    This tests calling batch_completions by running 100 messages together
+
+import sys, os
+import traceback, asyncio
+import pytest
+from fastapi.testclient import TestClient
+from fastapi import Request
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from openai_proxy import app
+
+
+def test_router_completion(): 
+    client = TestClient(app)
+    data = {
+        "model": "gpt-3.5-turbo",
+        "messages": [{"role": "user", "content": "Hey, how's it going?"}],
+        "model_list": [{ # list of model deployments 
+                "model_name": "gpt-3.5-turbo", # openai model name 
+                "litellm_params": { # params for litellm completion/embedding call 
+                    "model": "azure/chatgpt-v-2", 
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE")
+                },
+                "tpm": 240000,
+                "rpm": 1800
+            }, {
+                "model_name": "gpt-3.5-turbo", # openai model name 
+                "litellm_params": { # params for litellm completion/embedding call 
+                    "model": "azure/chatgpt-functioncalling", 
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE")
+                },
+                "tpm": 240000,
+                "rpm": 1800
+            }, {
+                "model_name": "gpt-3.5-turbo", # openai model name 
+                "litellm_params": { # params for litellm completion/embedding call 
+                    "model": "gpt-3.5-turbo", 
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+                "tpm": 1000000,
+                "rpm": 9000
+        }]
+    }
+
+    response = client.post("/router/completions", json=data)
+    print(f"response: {response.text}")
+    assert response.status_code == 200
+
+    response_data = response.json()
+    # Perform assertions on the response data
+    assert isinstance(response_data['choices'][0]['message']['content'], str)
+
+test_router_completion()
--- a/litellm_server/utils.py
+++ b/litellm_server/utils.py
@ -0,0 +1,70 @@
+import os, litellm
+import pkg_resources
+
+def get_package_version(package_name):
+    try:
+        package = pkg_resources.get_distribution(package_name)
+        return package.version
+    except pkg_resources.DistributionNotFound:
+        return None
+
+# Usage example
+package_name = "litellm"
+version = get_package_version(package_name)
+if version:
+    print(f"The version of {package_name} is {version}")
+else:
+    print(f"{package_name} is not installed")
+import yaml
+import dotenv
+from typing import Optional
+dotenv.load_dotenv() # load env variables
+
+def set_callbacks():
+    ## LOGGING
+    if len(os.getenv("SET_VERBOSE", "")) > 0: 
+        if os.getenv("SET_VERBOSE") == "True": 
+            litellm.set_verbose = True
+            print("\033[92mLiteLLM: Switched on verbose logging\033[0m")
+        else: 
+            litellm.set_verbose = False
+
+    ### LANGFUSE
+    if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
+        litellm.success_callback = ["langfuse"] 
+        print("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
+    
+    ## CACHING 
+    ### REDIS
+    if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0: 
+        from litellm.caching import Cache
+        litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
+        print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
+
+
+
+def load_router_config(router: Optional[litellm.Router]):
+    config = {}
+    config_file = '/app/config.yaml'
+
+    try: 
+        if os.path.exists(config_file):
+            with open(config_file, 'r') as file:
+                config = yaml.safe_load(file)
+        else:
+            print(f"Config file '{config_file}' not found.")
+    except:
+        print(f"Config file '{config_file}' not found.")
+
+    ## MODEL LIST
+    model_list = config.get('model_list', None)
+    if model_list: 
+        router = litellm.Router(model_list=model_list)
+    
+    ## ENVIRONMENT VARIABLES
+    environment_variables = config.get('environment_variables', None)
+    if environment_variables: 
+        for key, value in environment_variables.items(): 
+            os.environ[key] = value
+
+    return router