mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(litellm_server): commenting out the code
This commit is contained in:
parent
1976d0f7d6
commit
1ce505cbfb
8 changed files with 290 additions and 656 deletions
|
@ -1,43 +1,43 @@
|
||||||
# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
|
# # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
|
||||||
AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
|
# AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
|
||||||
|
|
||||||
OPENAI_API_KEY = ""
|
# OPENAI_API_KEY = ""
|
||||||
|
|
||||||
HUGGINGFACE_API_KEY=""
|
# HUGGINGFACE_API_KEY=""
|
||||||
|
|
||||||
TOGETHERAI_API_KEY=""
|
# TOGETHERAI_API_KEY=""
|
||||||
|
|
||||||
REPLICATE_API_KEY=""
|
# REPLICATE_API_KEY=""
|
||||||
|
|
||||||
## bedrock / sagemaker
|
# ## bedrock / sagemaker
|
||||||
AWS_ACCESS_KEY_ID = ""
|
# AWS_ACCESS_KEY_ID = ""
|
||||||
AWS_SECRET_ACCESS_KEY = ""
|
# AWS_SECRET_ACCESS_KEY = ""
|
||||||
|
|
||||||
AZURE_API_KEY = ""
|
# AZURE_API_KEY = ""
|
||||||
AZURE_API_BASE = ""
|
# AZURE_API_BASE = ""
|
||||||
AZURE_API_VERSION = ""
|
# AZURE_API_VERSION = ""
|
||||||
|
|
||||||
ANTHROPIC_API_KEY = ""
|
# ANTHROPIC_API_KEY = ""
|
||||||
|
|
||||||
COHERE_API_KEY = ""
|
# COHERE_API_KEY = ""
|
||||||
|
|
||||||
## CONFIG FILE ##
|
# ## CONFIG FILE ##
|
||||||
# CONFIG_FILE_PATH = "" # uncomment to point to config file
|
# # CONFIG_FILE_PATH = "" # uncomment to point to config file
|
||||||
|
|
||||||
## LOGGING ##
|
# ## LOGGING ##
|
||||||
|
|
||||||
SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
|
# SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
|
||||||
|
|
||||||
### LANGFUSE
|
# ### LANGFUSE
|
||||||
LANGFUSE_PUBLIC_KEY = ""
|
# LANGFUSE_PUBLIC_KEY = ""
|
||||||
LANGFUSE_SECRET_KEY = ""
|
# LANGFUSE_SECRET_KEY = ""
|
||||||
# Optional, defaults to https://cloud.langfuse.com
|
# # Optional, defaults to https://cloud.langfuse.com
|
||||||
LANGFUSE_HOST = "" # optional
|
# LANGFUSE_HOST = "" # optional
|
||||||
|
|
||||||
|
|
||||||
## CACHING ##
|
# ## CACHING ##
|
||||||
|
|
||||||
### REDIS
|
# ### REDIS
|
||||||
REDIS_HOST = ""
|
# REDIS_HOST = ""
|
||||||
REDIS_PORT = ""
|
# REDIS_PORT = ""
|
||||||
REDIS_PASSWORD = ""
|
# REDIS_PASSWORD = ""
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
FROM python:3.10
|
# FROM python:3.10
|
||||||
|
|
||||||
ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
|
# ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
|
||||||
COPY . /app
|
# COPY . /app
|
||||||
WORKDIR /app
|
# WORKDIR /app
|
||||||
RUN pip install -r requirements.txt
|
# RUN pip install -r requirements.txt
|
||||||
|
|
||||||
EXPOSE $PORT
|
# EXPOSE $PORT
|
||||||
|
|
||||||
CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10
|
# CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10
|
|
@ -1,124 +1,3 @@
|
||||||
# litellm-server [experimental]
|
# litellm-server [experimental]
|
||||||
|
|
||||||
Load balancer for multiple API Deployments (eg. Azure/OpenAI)
|
Deprecated. See litellm/proxy
|
||||||
|
|
||||||
<img width="1036" alt="Screenshot 2023-11-06 at 6 54 16 PM" src="https://github.com/BerriAI/litellm/assets/17561003/d32da338-1d72-45bb-bca8-ac70f1d3e980">
|
|
||||||
|
|
||||||
LiteLLM Server supports:
|
|
||||||
- LLM API Calls in the OpenAI ChatCompletions format
|
|
||||||
- Caching + Logging capabilities (Redis and Langfuse, respectively)
|
|
||||||
- Setting API keys in the request headers or in the .env
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -e PORT=8000 -e OPENAI_API_KEY=<your-openai-key> -p 8000:8000 ghcr.io/berriai/litellm:latest
|
|
||||||
```
|
|
||||||
OpenAI Proxy running on http://0.0.0.0:8000
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/providers)
|
|
||||||
## Endpoints:
|
|
||||||
- `/chat/completions` - chat completions endpoint to call 100+ LLMs
|
|
||||||
- `/models` - available models on server
|
|
||||||
|
|
||||||
## Save Model-specific params (API Base, API Keys, Temperature, etc.)
|
|
||||||
Use the [router_config_template.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
|
|
||||||
|
|
||||||
1. Create a `config.yaml` file
|
|
||||||
```shell
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo # set model alias
|
|
||||||
litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
|
|
||||||
model: azure/chatgpt-v-2 # azure/<your-deployment-name> <- actual name used for litellm.completion()
|
|
||||||
api_key: your_azure_api_key
|
|
||||||
api_version: your_azure_api_version
|
|
||||||
api_base: your_azure_api_base
|
|
||||||
- model_name: mistral-7b
|
|
||||||
litellm_params:
|
|
||||||
model: ollama/mistral
|
|
||||||
api_base: your_ollama_api_base
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start the server
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -e PORT=8000 -p 8000:8000 -v $(pwd)/config.yaml:/app/config.yaml ghcr.io/berriai/litellm:latest
|
|
||||||
```
|
|
||||||
## Caching
|
|
||||||
|
|
||||||
Add Redis Caching to your server via environment variables
|
|
||||||
|
|
||||||
```env
|
|
||||||
### REDIS
|
|
||||||
REDIS_HOST = ""
|
|
||||||
REDIS_PORT = ""
|
|
||||||
REDIS_PASSWORD = ""
|
|
||||||
```
|
|
||||||
|
|
||||||
Docker command:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -e REDIST_HOST=<your-redis-host> -e REDIS_PORT=<your-redis-port> -e REDIS_PASSWORD=<your-redis-password> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
## Logging
|
|
||||||
|
|
||||||
1. Debug Logs
|
|
||||||
Print the input/output params by setting `SET_VERBOSE = "True"`.
|
|
||||||
|
|
||||||
Docker command:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -e SET_VERBOSE="True" -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
Add Langfuse Logging to your server via environment variables
|
|
||||||
|
|
||||||
```env
|
|
||||||
### LANGFUSE
|
|
||||||
LANGFUSE_PUBLIC_KEY = ""
|
|
||||||
LANGFUSE_SECRET_KEY = ""
|
|
||||||
# Optional, defaults to https://cloud.langfuse.com
|
|
||||||
LANGFUSE_HOST = "" # optional
|
|
||||||
```
|
|
||||||
|
|
||||||
Docker command:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -e LANGFUSE_PUBLIC_KEY=<your-public-key> -e LANGFUSE_SECRET_KEY=<your-secret-key> -e LANGFUSE_HOST=<your-langfuse-host> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
## Running Locally
|
|
||||||
```shell
|
|
||||||
$ git clone https://github.com/BerriAI/litellm.git
|
|
||||||
```
|
|
||||||
```shell
|
|
||||||
$ cd ./litellm/litellm_server
|
|
||||||
```
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ uvicorn main:app --host 0.0.0.0 --port 8000
|
|
||||||
```
|
|
||||||
### Custom Config
|
|
||||||
1. Create + Modify [router_config.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) (save your azure/openai/etc. deployment info)
|
|
||||||
```shell
|
|
||||||
cp ./router_config_template.yaml ./router_config.yaml
|
|
||||||
```
|
|
||||||
2. Build Docker Image
|
|
||||||
```shell
|
|
||||||
docker build -t litellm_server . --build-arg CONFIG_FILE=./router_config.yaml
|
|
||||||
```
|
|
||||||
3. Run Docker Image
|
|
||||||
```shell
|
|
||||||
docker run --name litellm_server -e PORT=8000 -p 8000:8000 litellm_server
|
|
||||||
```
|
|
|
@ -1,2 +1,2 @@
|
||||||
from .main import *
|
# from .main import *
|
||||||
from .server_utils import *
|
# from .server_utils import *
|
|
@ -1,193 +1,193 @@
|
||||||
import os, traceback
|
# import os, traceback
|
||||||
from fastapi import FastAPI, Request, HTTPException
|
# from fastapi import FastAPI, Request, HTTPException
|
||||||
from fastapi.routing import APIRouter
|
# from fastapi.routing import APIRouter
|
||||||
from fastapi.responses import StreamingResponse, FileResponse
|
# from fastapi.responses import StreamingResponse, FileResponse
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
# from fastapi.middleware.cors import CORSMiddleware
|
||||||
import json, sys
|
# import json, sys
|
||||||
from typing import Optional
|
# from typing import Optional
|
||||||
sys.path.insert(
|
# sys.path.insert(
|
||||||
0, os.path.abspath("../")
|
# 0, os.path.abspath("../")
|
||||||
) # Adds the parent directory to the system path - for litellm local dev
|
# ) # Adds the parent directory to the system path - for litellm local dev
|
||||||
import litellm
|
# import litellm
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
|
# from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
|
||||||
except ImportError:
|
# except ImportError:
|
||||||
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
|
# from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
|
||||||
import dotenv
|
# import dotenv
|
||||||
dotenv.load_dotenv() # load env variables
|
# dotenv.load_dotenv() # load env variables
|
||||||
|
|
||||||
app = FastAPI(docs_url="/", title="LiteLLM API")
|
# app = FastAPI(docs_url="/", title="LiteLLM API")
|
||||||
router = APIRouter()
|
# router = APIRouter()
|
||||||
origins = ["*"]
|
# origins = ["*"]
|
||||||
|
|
||||||
app.add_middleware(
|
# app.add_middleware(
|
||||||
CORSMiddleware,
|
# CORSMiddleware,
|
||||||
allow_origins=origins,
|
# allow_origins=origins,
|
||||||
allow_credentials=True,
|
# allow_credentials=True,
|
||||||
allow_methods=["*"],
|
# allow_methods=["*"],
|
||||||
allow_headers=["*"],
|
# allow_headers=["*"],
|
||||||
)
|
# )
|
||||||
#### GLOBAL VARIABLES ####
|
# #### GLOBAL VARIABLES ####
|
||||||
llm_router: Optional[litellm.Router] = None
|
# llm_router: Optional[litellm.Router] = None
|
||||||
llm_model_list: Optional[list] = None
|
# llm_model_list: Optional[list] = None
|
||||||
server_settings: Optional[dict] = None
|
# server_settings: Optional[dict] = None
|
||||||
|
|
||||||
set_callbacks() # sets litellm callbacks for logging if they exist in the environment
|
# set_callbacks() # sets litellm callbacks for logging if they exist in the environment
|
||||||
|
|
||||||
if "CONFIG_FILE_PATH" in os.environ:
|
# if "CONFIG_FILE_PATH" in os.environ:
|
||||||
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
|
# llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
|
||||||
else:
|
# else:
|
||||||
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
|
# llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
|
||||||
#### API ENDPOINTS ####
|
# #### API ENDPOINTS ####
|
||||||
@router.get("/v1/models")
|
# @router.get("/v1/models")
|
||||||
@router.get("/models") # if project requires model list
|
# @router.get("/models") # if project requires model list
|
||||||
def model_list():
|
# def model_list():
|
||||||
all_models = litellm.utils.get_valid_models()
|
# all_models = litellm.utils.get_valid_models()
|
||||||
if llm_model_list:
|
# if llm_model_list:
|
||||||
all_models += llm_model_list
|
# all_models += llm_model_list
|
||||||
return dict(
|
# return dict(
|
||||||
data=[
|
# data=[
|
||||||
{
|
# {
|
||||||
"id": model,
|
# "id": model,
|
||||||
"object": "model",
|
# "object": "model",
|
||||||
"created": 1677610602,
|
# "created": 1677610602,
|
||||||
"owned_by": "openai",
|
# "owned_by": "openai",
|
||||||
}
|
# }
|
||||||
for model in all_models
|
# for model in all_models
|
||||||
],
|
# ],
|
||||||
object="list",
|
# object="list",
|
||||||
)
|
# )
|
||||||
# for streaming
|
# # for streaming
|
||||||
def data_generator(response):
|
# def data_generator(response):
|
||||||
|
|
||||||
for chunk in response:
|
# for chunk in response:
|
||||||
|
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
# yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
|
|
||||||
@router.post("/v1/completions")
|
# @router.post("/v1/completions")
|
||||||
@router.post("/completions")
|
# @router.post("/completions")
|
||||||
async def completion(request: Request):
|
# async def completion(request: Request):
|
||||||
data = await request.json()
|
# data = await request.json()
|
||||||
response = litellm.completion(
|
# response = litellm.completion(
|
||||||
**data
|
# **data
|
||||||
)
|
# )
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
return response
|
# return response
|
||||||
|
|
||||||
@router.post("/v1/embeddings")
|
# @router.post("/v1/embeddings")
|
||||||
@router.post("/embeddings")
|
# @router.post("/embeddings")
|
||||||
async def embedding(request: Request):
|
# async def embedding(request: Request):
|
||||||
try:
|
# try:
|
||||||
data = await request.json()
|
# data = await request.json()
|
||||||
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
# # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
||||||
if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
|
# if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
|
||||||
api_key = request.headers.get("authorization")
|
|
||||||
api_key = api_key.replace("Bearer", "").strip() # type: ignore
|
|
||||||
if len(api_key.strip()) > 0:
|
|
||||||
api_key = api_key
|
|
||||||
data["api_key"] = api_key
|
|
||||||
response = litellm.embedding(
|
|
||||||
**data
|
|
||||||
)
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
|
||||||
return {"error": error_msg}
|
|
||||||
|
|
||||||
@router.post("/v1/chat/completions")
|
|
||||||
@router.post("/chat/completions")
|
|
||||||
@router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
|
|
||||||
async def chat_completion(request: Request, model: Optional[str] = None):
|
|
||||||
global llm_model_list, server_settings
|
|
||||||
try:
|
|
||||||
data = await request.json()
|
|
||||||
server_model = server_settings.get("completion_model", None) if server_settings else None
|
|
||||||
data["model"] = server_model or model or data["model"]
|
|
||||||
## CHECK KEYS ##
|
|
||||||
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
|
||||||
# env_validation = litellm.validate_environment(model=data["model"])
|
|
||||||
# if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
|
|
||||||
# if "authorization" in request.headers:
|
|
||||||
# api_key = request.headers.get("authorization")
|
# api_key = request.headers.get("authorization")
|
||||||
# elif "api-key" in request.headers:
|
# api_key = api_key.replace("Bearer", "").strip() # type: ignore
|
||||||
# api_key = request.headers.get("api-key")
|
# if len(api_key.strip()) > 0:
|
||||||
# print(f"api_key in headers: {api_key}")
|
|
||||||
# if " " in api_key:
|
|
||||||
# api_key = api_key.split(" ")[1]
|
|
||||||
# print(f"api_key split: {api_key}")
|
|
||||||
# if len(api_key) > 0:
|
|
||||||
# api_key = api_key
|
# api_key = api_key
|
||||||
# data["api_key"] = api_key
|
# data["api_key"] = api_key
|
||||||
# print(f"api_key in data: {api_key}")
|
# response = litellm.embedding(
|
||||||
## CHECK CONFIG ##
|
# **data
|
||||||
if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
|
# )
|
||||||
for m in llm_model_list:
|
# return response
|
||||||
if data["model"] == m["model_name"]:
|
# except Exception as e:
|
||||||
for key, value in m["litellm_params"].items():
|
# error_traceback = traceback.format_exc()
|
||||||
data[key] = value
|
# error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||||
break
|
|
||||||
response = litellm.completion(
|
|
||||||
**data
|
|
||||||
)
|
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
|
|
||||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
|
||||||
# return {"error": error_msg}
|
# return {"error": error_msg}
|
||||||
raise HTTPException(status_code=500, detail=error_msg)
|
|
||||||
|
|
||||||
@router.post("/router/completions")
|
# @router.post("/v1/chat/completions")
|
||||||
async def router_completion(request: Request):
|
# @router.post("/chat/completions")
|
||||||
global llm_router
|
# @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
|
||||||
try:
|
# async def chat_completion(request: Request, model: Optional[str] = None):
|
||||||
data = await request.json()
|
# global llm_model_list, server_settings
|
||||||
if "model_list" in data:
|
# try:
|
||||||
llm_router = litellm.Router(model_list=data.pop("model_list"))
|
# data = await request.json()
|
||||||
if llm_router is None:
|
# server_model = server_settings.get("completion_model", None) if server_settings else None
|
||||||
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
# data["model"] = server_model or model or data["model"]
|
||||||
|
# ## CHECK KEYS ##
|
||||||
|
# # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
||||||
|
# # env_validation = litellm.validate_environment(model=data["model"])
|
||||||
|
# # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
|
||||||
|
# # if "authorization" in request.headers:
|
||||||
|
# # api_key = request.headers.get("authorization")
|
||||||
|
# # elif "api-key" in request.headers:
|
||||||
|
# # api_key = request.headers.get("api-key")
|
||||||
|
# # print(f"api_key in headers: {api_key}")
|
||||||
|
# # if " " in api_key:
|
||||||
|
# # api_key = api_key.split(" ")[1]
|
||||||
|
# # print(f"api_key split: {api_key}")
|
||||||
|
# # if len(api_key) > 0:
|
||||||
|
# # api_key = api_key
|
||||||
|
# # data["api_key"] = api_key
|
||||||
|
# # print(f"api_key in data: {api_key}")
|
||||||
|
# ## CHECK CONFIG ##
|
||||||
|
# if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
|
||||||
|
# for m in llm_model_list:
|
||||||
|
# if data["model"] == m["model_name"]:
|
||||||
|
# for key, value in m["litellm_params"].items():
|
||||||
|
# data[key] = value
|
||||||
|
# break
|
||||||
|
# response = litellm.completion(
|
||||||
|
# **data
|
||||||
|
# )
|
||||||
|
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
|
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
|
# return response
|
||||||
|
# except Exception as e:
|
||||||
|
# error_traceback = traceback.format_exc()
|
||||||
|
|
||||||
# openai.ChatCompletion.create replacement
|
# error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||||
response = await llm_router.acompletion(model="gpt-3.5-turbo",
|
# # return {"error": error_msg}
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
# raise HTTPException(status_code=500, detail=error_msg)
|
||||||
|
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
# @router.post("/router/completions")
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
# async def router_completion(request: Request):
|
||||||
return response
|
# global llm_router
|
||||||
except Exception as e:
|
# try:
|
||||||
error_traceback = traceback.format_exc()
|
# data = await request.json()
|
||||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
# if "model_list" in data:
|
||||||
return {"error": error_msg}
|
# llm_router = litellm.Router(model_list=data.pop("model_list"))
|
||||||
|
# if llm_router is None:
|
||||||
|
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
||||||
|
|
||||||
@router.post("/router/embedding")
|
# # openai.ChatCompletion.create replacement
|
||||||
async def router_embedding(request: Request):
|
# response = await llm_router.acompletion(model="gpt-3.5-turbo",
|
||||||
global llm_router
|
# messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||||
try:
|
|
||||||
data = await request.json()
|
|
||||||
if "model_list" in data:
|
|
||||||
llm_router = litellm.Router(model_list=data.pop("model_list"))
|
|
||||||
if llm_router is None:
|
|
||||||
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
|
||||||
|
|
||||||
response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
|
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
|
# return response
|
||||||
|
# except Exception as e:
|
||||||
|
# error_traceback = traceback.format_exc()
|
||||||
|
# error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||||
|
# return {"error": error_msg}
|
||||||
|
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
# @router.post("/router/embedding")
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
# async def router_embedding(request: Request):
|
||||||
return response
|
# global llm_router
|
||||||
except Exception as e:
|
# try:
|
||||||
error_traceback = traceback.format_exc()
|
# data = await request.json()
|
||||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
# if "model_list" in data:
|
||||||
return {"error": error_msg}
|
# llm_router = litellm.Router(model_list=data.pop("model_list"))
|
||||||
|
# if llm_router is None:
|
||||||
|
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
||||||
|
|
||||||
@router.get("/")
|
# response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
|
||||||
async def home(request: Request):
|
# messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||||
return "LiteLLM: RUNNING"
|
|
||||||
|
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
|
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
|
# return response
|
||||||
|
# except Exception as e:
|
||||||
|
# error_traceback = traceback.format_exc()
|
||||||
|
# error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||||
|
# return {"error": error_msg}
|
||||||
|
|
||||||
|
# @router.get("/")
|
||||||
|
# async def home(request: Request):
|
||||||
|
# return "LiteLLM: RUNNING"
|
||||||
|
|
||||||
|
|
||||||
app.include_router(router)
|
# app.include_router(router)
|
|
@ -1,245 +0,0 @@
|
||||||
{
|
|
||||||
"openapi": "3.0.0",
|
|
||||||
"info": {
|
|
||||||
"version": "1.0.0",
|
|
||||||
"title": "LiteLLM API",
|
|
||||||
"description": "API for LiteLLM"
|
|
||||||
},
|
|
||||||
"paths": {
|
|
||||||
"/chat/completions": {
|
|
||||||
"post": {
|
|
||||||
"summary": "Create chat completion for 100+ LLM APIs",
|
|
||||||
"requestBody": {
|
|
||||||
"description": "Input parameters for chat completions",
|
|
||||||
"required": true,
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/ChatCompletionsRequest"
|
|
||||||
},
|
|
||||||
"example": {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful assistant."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test message from litellm proxy, can you ack"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"frequency_penalty": 0.0,
|
|
||||||
"max_tokens": 500,
|
|
||||||
"n": 1,
|
|
||||||
"presence_penalty": 0.0,
|
|
||||||
"stop": "###",
|
|
||||||
"stream": false,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_p": 0.8,
|
|
||||||
"user": "test-litellm"
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "Successful operation",
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/ChatCompletionsResponse"
|
|
||||||
},
|
|
||||||
"example": {
|
|
||||||
"object": "chat.completion",
|
|
||||||
"id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"finish_reason": "stop_sequence",
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"content": "I'm a large language model trained by OpenAI, ACK receiving this message",
|
|
||||||
"role": "assistant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1698253693.169062,
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"usage": {
|
|
||||||
"prompt_tokens": 14,
|
|
||||||
"completion_tokens": 102,
|
|
||||||
"total_tokens": 116
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"500": {
|
|
||||||
"description": "Server error"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/models": {
|
|
||||||
"get": {
|
|
||||||
"summary": "Get models",
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "Successful operation"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/": {
|
|
||||||
"get": {
|
|
||||||
"summary": "Swagger docs",
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "Successful operation"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"components": {
|
|
||||||
"schemas": {
|
|
||||||
"ChatCompletionsRequest": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"messages": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"role": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"content": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["role", "content"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"frequency_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"function_call": {
|
|
||||||
"type": ["string", "object"]
|
|
||||||
},
|
|
||||||
"functions": {
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
"logit_bias": {
|
|
||||||
"type": "object"
|
|
||||||
},
|
|
||||||
"max_tokens": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"presence_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"stop": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"stream": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"temperature": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"top_p": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"caching": {
|
|
||||||
"type": "boolean"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["messages", "model"]
|
|
||||||
},
|
|
||||||
"ChatCompletionsResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"object": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"choices": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"finish_reason": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"index": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"message": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"content": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"role": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["content", "role"]
|
|
||||||
},
|
|
||||||
"usage": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"prompt_tokens": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"completion_tokens": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"total_tokens": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["prompt_tokens", "completion_tokens", "total_tokens"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["finish_reason", "index", "message", "usage"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"created": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["object", "choices", "id", "created", "model"]
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
openai
|
# openai
|
||||||
fastapi
|
# fastapi
|
||||||
uvicorn
|
# uvicorn
|
||||||
boto3
|
# boto3
|
||||||
litellm
|
# litellm
|
||||||
python-dotenv
|
# python-dotenv
|
||||||
redis
|
# redis
|
|
@ -1,86 +1,86 @@
|
||||||
import os, litellm
|
# import os, litellm
|
||||||
import pkg_resources
|
# import pkg_resources
|
||||||
import dotenv
|
# import dotenv
|
||||||
dotenv.load_dotenv() # load env variables
|
# dotenv.load_dotenv() # load env variables
|
||||||
|
|
||||||
def print_verbose(print_statement):
|
# def print_verbose(print_statement):
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
def get_package_version(package_name):
|
# def get_package_version(package_name):
|
||||||
try:
|
# try:
|
||||||
package = pkg_resources.get_distribution(package_name)
|
# package = pkg_resources.get_distribution(package_name)
|
||||||
return package.version
|
# return package.version
|
||||||
except pkg_resources.DistributionNotFound:
|
# except pkg_resources.DistributionNotFound:
|
||||||
return None
|
# return None
|
||||||
|
|
||||||
# Usage example
|
# # Usage example
|
||||||
package_name = "litellm"
|
# package_name = "litellm"
|
||||||
version = get_package_version(package_name)
|
# version = get_package_version(package_name)
|
||||||
if version:
|
# if version:
|
||||||
print_verbose(f"The version of {package_name} is {version}")
|
# print_verbose(f"The version of {package_name} is {version}")
|
||||||
else:
|
# else:
|
||||||
print_verbose(f"{package_name} is not installed")
|
# print_verbose(f"{package_name} is not installed")
|
||||||
import yaml
|
# import yaml
|
||||||
import dotenv
|
# import dotenv
|
||||||
from typing import Optional
|
# from typing import Optional
|
||||||
dotenv.load_dotenv() # load env variables
|
# dotenv.load_dotenv() # load env variables
|
||||||
|
|
||||||
def set_callbacks():
|
# def set_callbacks():
|
||||||
## LOGGING
|
# ## LOGGING
|
||||||
if len(os.getenv("SET_VERBOSE", "")) > 0:
|
# if len(os.getenv("SET_VERBOSE", "")) > 0:
|
||||||
if os.getenv("SET_VERBOSE") == "True":
|
# if os.getenv("SET_VERBOSE") == "True":
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
|
# print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
|
||||||
else:
|
# else:
|
||||||
litellm.set_verbose = False
|
# litellm.set_verbose = False
|
||||||
|
|
||||||
### LANGFUSE
|
# ### LANGFUSE
|
||||||
if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
|
# if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
|
||||||
litellm.success_callback = ["langfuse"]
|
# litellm.success_callback = ["langfuse"]
|
||||||
print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
|
# print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
|
||||||
|
|
||||||
## CACHING
|
# ## CACHING
|
||||||
### REDIS
|
# ### REDIS
|
||||||
# if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
||||||
# print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
|
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
|
||||||
# from litellm.caching import Cache
|
# # from litellm.caching import Cache
|
||||||
# litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
||||||
# print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
|
# def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
|
||||||
config = {}
|
# config = {}
|
||||||
server_settings = {}
|
# server_settings = {}
|
||||||
try:
|
# try:
|
||||||
if os.path.exists(config_file_path): # type: ignore
|
# if os.path.exists(config_file_path): # type: ignore
|
||||||
with open(config_file_path, 'r') as file: # type: ignore
|
# with open(config_file_path, 'r') as file: # type: ignore
|
||||||
config = yaml.safe_load(file)
|
# config = yaml.safe_load(file)
|
||||||
else:
|
# else:
|
||||||
pass
|
# pass
|
||||||
except:
|
# except:
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
|
# ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
|
||||||
server_settings = config.get("server_settings", None)
|
# server_settings = config.get("server_settings", None)
|
||||||
if server_settings:
|
# if server_settings:
|
||||||
server_settings = server_settings
|
# server_settings = server_settings
|
||||||
|
|
||||||
## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
|
# ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
|
||||||
litellm_settings = config.get('litellm_settings', None)
|
# litellm_settings = config.get('litellm_settings', None)
|
||||||
if litellm_settings:
|
# if litellm_settings:
|
||||||
for key, value in litellm_settings.items():
|
# for key, value in litellm_settings.items():
|
||||||
setattr(litellm, key, value)
|
# setattr(litellm, key, value)
|
||||||
|
|
||||||
## MODEL LIST
|
# ## MODEL LIST
|
||||||
model_list = config.get('model_list', None)
|
# model_list = config.get('model_list', None)
|
||||||
if model_list:
|
# if model_list:
|
||||||
router = litellm.Router(model_list=model_list)
|
# router = litellm.Router(model_list=model_list)
|
||||||
|
|
||||||
## ENVIRONMENT VARIABLES
|
# ## ENVIRONMENT VARIABLES
|
||||||
environment_variables = config.get('environment_variables', None)
|
# environment_variables = config.get('environment_variables', None)
|
||||||
if environment_variables:
|
# if environment_variables:
|
||||||
for key, value in environment_variables.items():
|
# for key, value in environment_variables.items():
|
||||||
os.environ[key] = value
|
# os.environ[key] = value
|
||||||
|
|
||||||
return router, model_list, server_settings
|
# return router, model_list, server_settings
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue