diff --git a/litellm/utils.py b/litellm/utils.py index 05bc0d4fa..9c2611012 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -774,12 +774,13 @@ def client(original_function): if (litellm.caching or litellm.caching_with_models) and litellm.cache is None: litellm.cache = Cache() - if kwargs.get("caching", False): # allow users to control returning cached responses from the completion function + if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function # checking cache if (litellm.cache != None or litellm.caching or litellm.caching_with_models): print_verbose(f"LiteLLM: Checking Cache") cached_result = litellm.cache.get_cache(*args, **kwargs) if cached_result != None: + print_verbose(f"Cache Hit!") return cached_result # MODEL CALL diff --git a/openai-proxy/.env.template b/openai-proxy/.env.template index 0d4649d7b..292698126 100644 --- a/openai-proxy/.env.template +++ b/openai-proxy/.env.template @@ -1,5 +1,15 @@ OPENAI_API_KEY = "" +HUGGINGFACE_API_KEY="" + +TOGETHERAI_API_KEY="" + +REPLICATE_API_KEY="" + +## bedrock / sagemaker +AWS_ACCESS_KEY_ID = "" +AWS_SECRET_ACCESS_KEY = "" + AZURE_API_KEY = "" AZURE_API_BASE = "" AZURE_API_VERSION = "" @@ -8,3 +18,17 @@ ANTHROPIC_API_KEY = "" COHERE_API_KEY = "" +## LOGGING ## + +### LANGFUSE +LANGFUSE_PUBLIC_KEY = "" +LANGFUSE_SECRET_KEY = "" +# Optional, defaults to https://cloud.langfuse.com +LANGFUSE_HOST = "" # optional + +## CACHING ## + +### REDIS +REDIS_HOST = "" +REDIS_PORT = "" +REDIS_PASSWORD = "" \ No newline at end of file diff --git a/openai-proxy/main.py b/openai-proxy/main.py index 9bba0d96b..7d0937f67 100644 --- a/openai-proxy/main.py +++ b/openai-proxy/main.py @@ -1,4 +1,4 @@ -import litellm, os +import litellm, os, traceback from fastapi import FastAPI, Request, HTTPException from fastapi.routing import APIRouter from fastapi.responses import StreamingResponse, FileResponse @@ -21,7 +21,6 @@ app.add_middleware( allow_headers=["*"], ) set_callbacks() # sets litellm callbacks for logging if they exist in the environment - #### API ENDPOINTS #### @router.post("/v1/models") @router.get("/models") # if project requires model list @@ -65,8 +64,10 @@ async def chat_completion(request: Request): data = await request.json() if "authorization" in request.headers: # if users pass LLM api keys as part of header api_key = request.headers.get("authorization") - api_key = api_key.split(" ")[1] - data["api_key"] = api_key + api_key = api_key.replace("Bearer", "").strip() + if len(api_key.strip()) > 0: + api_key = api_key + data["api_key"] = api_key response = litellm.completion( **data ) @@ -74,7 +75,10 @@ async def chat_completion(request: Request): return StreamingResponse(data_generator(response), media_type='text/event-stream') return response except Exception as e: - return HTTPException(status_code=500, detail=str(e)) + error_traceback = traceback.format_exc() + error_msg = f"{str(e)}\n\n{error_traceback}" + return {"error": error_msg} + # raise HTTPException(status_code=500, detail=error_msg) @router.get("/") async def home(request: Request): diff --git a/openai-proxy/tests/test_caching.py b/openai-proxy/tests/test_caching.py new file mode 100644 index 000000000..d6940e1da --- /dev/null +++ b/openai-proxy/tests/test_caching.py @@ -0,0 +1,60 @@ +import openai, os, dotenv, traceback, time +openai.api_base = "http://0.0.0.0:8000" +dotenv.load_dotenv() +openai.api_key = os.getenv("ANTHROPIC_API_KEY") # this gets passed as a header + +response1 = openai.ChatCompletion.create( + model = "claude-instant-1", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], +) + +try: + print(f"response: {response1['choices'][0]['message']['content']}") +except: + print(f"response: {response1}") + +time.sleep(1) # allow time for request to be stored + +response2 = openai.ChatCompletion.create( + model = "claude-instant-1", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], +) + +try: + print(f"response: {response2['choices'][0]['message']['content']}") +except: + print(f"response: {response2}") + +openai.api_key = os.getenv("OPENAI_API_KEY") + +try: + response3 = openai.ChatCompletion.create( + model = "gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + ) +except Exception as e: + traceback.print_exc() + +try: + print(f"response: {response3['choices'][0]['message']['content']}") +except: + print(f"response: {response3}") + +assert response1["choices"][0]["message"]["content"] == response2["choices"][0]["message"]["content"] + +assert response1["choices"][0]["message"]["content"] != response3["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/openai-proxy/utils.py b/openai-proxy/utils.py index 3bf84bfbf..841ff8b87 100644 --- a/openai-proxy/utils.py +++ b/openai-proxy/utils.py @@ -3,5 +3,20 @@ import dotenv dotenv.load_dotenv() # load env variables def set_callbacks(): - if ("LANGFUSE_PUBLIC_KEY" in os.environ and "LANGFUSE_SECRET_KEY" in os.environ) or "LANGFUSE_HOST" in os.environ: + ## LOGGING + ### LANGFUSE + if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0: + print(f"sets langfuse integration") litellm.success_callback = ["langfuse"] + + ## CACHING + ### REDIS + print(f"redis host: {len(os.getenv('REDIS_HOST', ''))}; redis port: {len(os.getenv('REDIS_PORT', ''))}; redis password: {len(os.getenv('REDIS_PASSWORD'))}") + if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0: + print(f"sets caching integration") + from litellm.caching import Cache + litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD")) + + + +