litellm/openai-proxy/main.py
2023-10-23 14:42:04 -07:00

104 lines
3.4 KiB
Python

import litellm, os
from fastapi import FastAPI, Request
from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
import json
import os
app = FastAPI(docs_url="/", title="LiteLLM API")
router = APIRouter()
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
if ("LANGUFSE_PUBLIC_KEY" in os.environ and "LANGUFSE_SECRET_KEY" in os.environ) or "LANGFUSE_HOST" in os.environ:
litellm.success_callback = ["langfuse"]
#### API ENDPOINTS ####
@router.post("/v1/models")
@router.get("/models") # if project requires model list
def model_list():
all_models = litellm.utils.get_valid_models()
return dict(
data=[
{
"id": model,
"object": "model",
"created": 1677610602,
"owned_by": "openai",
}
for model in all_models
],
object="list",
)
# for streaming
def data_generator(response):
print("inside generator")
for chunk in response:
print(f"returned chunk: {chunk}")
yield f"data: {json.dumps(chunk)}\n\n"
@router.post("/v1/completions")
@router.post("/completions")
async def completion(request: Request):
data = await request.json()
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
async def chat_completion(request: Request):
<<<<<<< HEAD
try:
data = await request.json()
if "authorization" in request.headers: # if users pass LLM api keys as part of header
api_key = request.headers.get("authorization")
api_key = api_key.split(" ")[1]
data["api_key"] = api_key
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
except Exception as e:
return HTTPException(status_code=500, detail=str(e))
=======
data = await request.json()
api_key = request.headers.get("authorization")
api_key = api_key.split(" ")[1]
## check for special character - '|' <- used for bedrock (aws_access_key + "|" + aws_secret_access_key + "|" + aws_region_name)
if "|" in api_key: ## BEDROCK
aws_keys = api_key.split("|")
data["aws_access_key_id"] = aws_keys[0]
data["aws_secret_access_key"] = aws_keys[1]
data["aws_region_name"] = aws_keys[2]
else: ## ALL OTHER PROVIDERS
data["api_key"] = api_key
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
>>>>>>> 968b835 (fix(openai-proxy): adding langfuse)
@router.get("/")
async def home(request: Request):
return "LiteLLM: RUNNING"
app.include_router(router)