fix(litellm_server): commenting out the code

This commit is contained in:
Krrish Dholakia 2023-11-20 15:39:05 -08:00
parent 1976d0f7d6
commit 1ce505cbfb
8 changed files with 290 additions and 656 deletions

View file

@ -1,43 +1,43 @@
# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables # # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys # AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
OPENAI_API_KEY = "" # OPENAI_API_KEY = ""
HUGGINGFACE_API_KEY="" # HUGGINGFACE_API_KEY=""
TOGETHERAI_API_KEY="" # TOGETHERAI_API_KEY=""
REPLICATE_API_KEY="" # REPLICATE_API_KEY=""
## bedrock / sagemaker # ## bedrock / sagemaker
AWS_ACCESS_KEY_ID = "" # AWS_ACCESS_KEY_ID = ""
AWS_SECRET_ACCESS_KEY = "" # AWS_SECRET_ACCESS_KEY = ""
AZURE_API_KEY = "" # AZURE_API_KEY = ""
AZURE_API_BASE = "" # AZURE_API_BASE = ""
AZURE_API_VERSION = "" # AZURE_API_VERSION = ""
ANTHROPIC_API_KEY = "" # ANTHROPIC_API_KEY = ""
COHERE_API_KEY = "" # COHERE_API_KEY = ""
## CONFIG FILE ## # ## CONFIG FILE ##
# CONFIG_FILE_PATH = "" # uncomment to point to config file # # CONFIG_FILE_PATH = "" # uncomment to point to config file
## LOGGING ## # ## LOGGING ##
SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs # SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
### LANGFUSE # ### LANGFUSE
LANGFUSE_PUBLIC_KEY = "" # LANGFUSE_PUBLIC_KEY = ""
LANGFUSE_SECRET_KEY = "" # LANGFUSE_SECRET_KEY = ""
# Optional, defaults to https://cloud.langfuse.com # # Optional, defaults to https://cloud.langfuse.com
LANGFUSE_HOST = "" # optional # LANGFUSE_HOST = "" # optional
## CACHING ## # ## CACHING ##
### REDIS # ### REDIS
REDIS_HOST = "" # REDIS_HOST = ""
REDIS_PORT = "" # REDIS_PORT = ""
REDIS_PASSWORD = "" # REDIS_PASSWORD = ""

View file

@ -1,10 +1,10 @@
FROM python:3.10 # FROM python:3.10
ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml" # ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
COPY . /app # COPY . /app
WORKDIR /app # WORKDIR /app
RUN pip install -r requirements.txt # RUN pip install -r requirements.txt
EXPOSE $PORT # EXPOSE $PORT
CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10 # CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10

View file

@ -1,124 +1,3 @@
# litellm-server [experimental] # litellm-server [experimental]
Load balancer for multiple API Deployments (eg. Azure/OpenAI) Deprecated. See litellm/proxy
<img width="1036" alt="Screenshot 2023-11-06 at 6 54 16 PM" src="https://github.com/BerriAI/litellm/assets/17561003/d32da338-1d72-45bb-bca8-ac70f1d3e980">
LiteLLM Server supports:
- LLM API Calls in the OpenAI ChatCompletions format
- Caching + Logging capabilities (Redis and Langfuse, respectively)
- Setting API keys in the request headers or in the .env
## Usage
```shell
docker run -e PORT=8000 -e OPENAI_API_KEY=<your-openai-key> -p 8000:8000 ghcr.io/berriai/litellm:latest
```
OpenAI Proxy running on http://0.0.0.0:8000
```shell
curl http://0.0.0.0:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/providers)
## Endpoints:
- `/chat/completions` - chat completions endpoint to call 100+ LLMs
- `/models` - available models on server
## Save Model-specific params (API Base, API Keys, Temperature, etc.)
Use the [router_config_template.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
1. Create a `config.yaml` file
```shell
model_list:
- model_name: gpt-3.5-turbo # set model alias
litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
model: azure/chatgpt-v-2 # azure/<your-deployment-name> <- actual name used for litellm.completion()
api_key: your_azure_api_key
api_version: your_azure_api_version
api_base: your_azure_api_base
- model_name: mistral-7b
litellm_params:
model: ollama/mistral
api_base: your_ollama_api_base
```
2. Start the server
```shell
docker run -e PORT=8000 -p 8000:8000 -v $(pwd)/config.yaml:/app/config.yaml ghcr.io/berriai/litellm:latest
```
## Caching
Add Redis Caching to your server via environment variables
```env
### REDIS
REDIS_HOST = ""
REDIS_PORT = ""
REDIS_PASSWORD = ""
```
Docker command:
```shell
docker run -e REDIST_HOST=<your-redis-host> -e REDIS_PORT=<your-redis-port> -e REDIS_PASSWORD=<your-redis-password> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
## Logging
1. Debug Logs
Print the input/output params by setting `SET_VERBOSE = "True"`.
Docker command:
```shell
docker run -e SET_VERBOSE="True" -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
Add Langfuse Logging to your server via environment variables
```env
### LANGFUSE
LANGFUSE_PUBLIC_KEY = ""
LANGFUSE_SECRET_KEY = ""
# Optional, defaults to https://cloud.langfuse.com
LANGFUSE_HOST = "" # optional
```
Docker command:
```shell
docker run -e LANGFUSE_PUBLIC_KEY=<your-public-key> -e LANGFUSE_SECRET_KEY=<your-secret-key> -e LANGFUSE_HOST=<your-langfuse-host> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
## Running Locally
```shell
$ git clone https://github.com/BerriAI/litellm.git
```
```shell
$ cd ./litellm/litellm_server
```
```shell
$ uvicorn main:app --host 0.0.0.0 --port 8000
```
### Custom Config
1. Create + Modify [router_config.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) (save your azure/openai/etc. deployment info)
```shell
cp ./router_config_template.yaml ./router_config.yaml
```
2. Build Docker Image
```shell
docker build -t litellm_server . --build-arg CONFIG_FILE=./router_config.yaml
```
3. Run Docker Image
```shell
docker run --name litellm_server -e PORT=8000 -p 8000:8000 litellm_server
```

View file

@ -1,2 +1,2 @@
from .main import * # from .main import *
from .server_utils import * # from .server_utils import *

View file

@ -1,193 +1,193 @@
import os, traceback # import os, traceback
from fastapi import FastAPI, Request, HTTPException # from fastapi import FastAPI, Request, HTTPException
from fastapi.routing import APIRouter # from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse # from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware # from fastapi.middleware.cors import CORSMiddleware
import json, sys # import json, sys
from typing import Optional # from typing import Optional
sys.path.insert( # sys.path.insert(
0, os.path.abspath("../") # 0, os.path.abspath("../")
) # Adds the parent directory to the system path - for litellm local dev # ) # Adds the parent directory to the system path - for litellm local dev
import litellm # import litellm
try: # try:
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
except ImportError: # except ImportError:
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
import dotenv # import dotenv
dotenv.load_dotenv() # load env variables # dotenv.load_dotenv() # load env variables
app = FastAPI(docs_url="/", title="LiteLLM API") # app = FastAPI(docs_url="/", title="LiteLLM API")
router = APIRouter() # router = APIRouter()
origins = ["*"] # origins = ["*"]
app.add_middleware( # app.add_middleware(
CORSMiddleware, # CORSMiddleware,
allow_origins=origins, # allow_origins=origins,
allow_credentials=True, # allow_credentials=True,
allow_methods=["*"], # allow_methods=["*"],
allow_headers=["*"], # allow_headers=["*"],
) # )
#### GLOBAL VARIABLES #### # #### GLOBAL VARIABLES ####
llm_router: Optional[litellm.Router] = None # llm_router: Optional[litellm.Router] = None
llm_model_list: Optional[list] = None # llm_model_list: Optional[list] = None
server_settings: Optional[dict] = None # server_settings: Optional[dict] = None
set_callbacks() # sets litellm callbacks for logging if they exist in the environment # set_callbacks() # sets litellm callbacks for logging if they exist in the environment
if "CONFIG_FILE_PATH" in os.environ: # if "CONFIG_FILE_PATH" in os.environ:
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH")) # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
else: # else:
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router) # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
#### API ENDPOINTS #### # #### API ENDPOINTS ####
@router.get("/v1/models") # @router.get("/v1/models")
@router.get("/models") # if project requires model list # @router.get("/models") # if project requires model list
def model_list(): # def model_list():
all_models = litellm.utils.get_valid_models() # all_models = litellm.utils.get_valid_models()
if llm_model_list: # if llm_model_list:
all_models += llm_model_list # all_models += llm_model_list
return dict( # return dict(
data=[ # data=[
{ # {
"id": model, # "id": model,
"object": "model", # "object": "model",
"created": 1677610602, # "created": 1677610602,
"owned_by": "openai", # "owned_by": "openai",
} # }
for model in all_models # for model in all_models
], # ],
object="list", # object="list",
) # )
# for streaming # # for streaming
def data_generator(response): # def data_generator(response):
for chunk in response: # for chunk in response:
yield f"data: {json.dumps(chunk)}\n\n" # yield f"data: {json.dumps(chunk)}\n\n"
@router.post("/v1/completions") # @router.post("/v1/completions")
@router.post("/completions") # @router.post("/completions")
async def completion(request: Request): # async def completion(request: Request):
data = await request.json() # data = await request.json()
response = litellm.completion( # response = litellm.completion(
**data # **data
) # )
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream') # return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response # return response
@router.post("/v1/embeddings") # @router.post("/v1/embeddings")
@router.post("/embeddings") # @router.post("/embeddings")
async def embedding(request: Request): # async def embedding(request: Request):
try: # try:
data = await request.json() # data = await request.json()
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers # # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header # if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
api_key = request.headers.get("authorization")
api_key = api_key.replace("Bearer", "").strip() # type: ignore
if len(api_key.strip()) > 0:
api_key = api_key
data["api_key"] = api_key
response = litellm.embedding(
**data
)
return response
except Exception as e:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
return {"error": error_msg}
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
@router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
async def chat_completion(request: Request, model: Optional[str] = None):
global llm_model_list, server_settings
try:
data = await request.json()
server_model = server_settings.get("completion_model", None) if server_settings else None
data["model"] = server_model or model or data["model"]
## CHECK KEYS ##
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
# env_validation = litellm.validate_environment(model=data["model"])
# if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
# if "authorization" in request.headers:
# api_key = request.headers.get("authorization") # api_key = request.headers.get("authorization")
# elif "api-key" in request.headers: # api_key = api_key.replace("Bearer", "").strip() # type: ignore
# api_key = request.headers.get("api-key") # if len(api_key.strip()) > 0:
# print(f"api_key in headers: {api_key}")
# if " " in api_key:
# api_key = api_key.split(" ")[1]
# print(f"api_key split: {api_key}")
# if len(api_key) > 0:
# api_key = api_key # api_key = api_key
# data["api_key"] = api_key # data["api_key"] = api_key
# print(f"api_key in data: {api_key}") # response = litellm.embedding(
## CHECK CONFIG ## # **data
if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]: # )
for m in llm_model_list: # return response
if data["model"] == m["model_name"]: # except Exception as e:
for key, value in m["litellm_params"].items(): # error_traceback = traceback.format_exc()
data[key] = value # error_msg = f"{str(e)}\n\n{error_traceback}"
break
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
except Exception as e:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg} # return {"error": error_msg}
raise HTTPException(status_code=500, detail=error_msg)
@router.post("/router/completions") # @router.post("/v1/chat/completions")
async def router_completion(request: Request): # @router.post("/chat/completions")
global llm_router # @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
try: # async def chat_completion(request: Request, model: Optional[str] = None):
data = await request.json() # global llm_model_list, server_settings
if "model_list" in data: # try:
llm_router = litellm.Router(model_list=data.pop("model_list")) # data = await request.json()
if llm_router is None: # server_model = server_settings.get("completion_model", None) if server_settings else None
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body") # data["model"] = server_model or model or data["model"]
# ## CHECK KEYS ##
# # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
# # env_validation = litellm.validate_environment(model=data["model"])
# # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
# # if "authorization" in request.headers:
# # api_key = request.headers.get("authorization")
# # elif "api-key" in request.headers:
# # api_key = request.headers.get("api-key")
# # print(f"api_key in headers: {api_key}")
# # if " " in api_key:
# # api_key = api_key.split(" ")[1]
# # print(f"api_key split: {api_key}")
# # if len(api_key) > 0:
# # api_key = api_key
# # data["api_key"] = api_key
# # print(f"api_key in data: {api_key}")
# ## CHECK CONFIG ##
# if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
# for m in llm_model_list:
# if data["model"] == m["model_name"]:
# for key, value in m["litellm_params"].items():
# data[key] = value
# break
# response = litellm.completion(
# **data
# )
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# openai.ChatCompletion.create replacement # error_msg = f"{str(e)}\n\n{error_traceback}"
response = await llm_router.acompletion(model="gpt-3.5-turbo", # # return {"error": error_msg}
messages=[{"role": "user", "content": "Hey, how's it going?"}]) # raise HTTPException(status_code=500, detail=error_msg)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses # @router.post("/router/completions")
return StreamingResponse(data_generator(response), media_type='text/event-stream') # async def router_completion(request: Request):
return response # global llm_router
except Exception as e: # try:
error_traceback = traceback.format_exc() # data = await request.json()
error_msg = f"{str(e)}\n\n{error_traceback}" # if "model_list" in data:
return {"error": error_msg} # llm_router = litellm.Router(model_list=data.pop("model_list"))
# if llm_router is None:
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
@router.post("/router/embedding") # # openai.ChatCompletion.create replacement
async def router_embedding(request: Request): # response = await llm_router.acompletion(model="gpt-3.5-turbo",
global llm_router # messages=[{"role": "user", "content": "Hey, how's it going?"}])
try:
data = await request.json()
if "model_list" in data:
llm_router = litellm.Router(model_list=data.pop("model_list"))
if llm_router is None:
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
messages=[{"role": "user", "content": "Hey, how's it going?"}]) # return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses # @router.post("/router/embedding")
return StreamingResponse(data_generator(response), media_type='text/event-stream') # async def router_embedding(request: Request):
return response # global llm_router
except Exception as e: # try:
error_traceback = traceback.format_exc() # data = await request.json()
error_msg = f"{str(e)}\n\n{error_traceback}" # if "model_list" in data:
return {"error": error_msg} # llm_router = litellm.Router(model_list=data.pop("model_list"))
# if llm_router is None:
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
@router.get("/") # response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
async def home(request: Request): # messages=[{"role": "user", "content": "Hey, how's it going?"}])
return "LiteLLM: RUNNING"
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
# @router.get("/")
# async def home(request: Request):
# return "LiteLLM: RUNNING"
app.include_router(router) # app.include_router(router)

View file

@ -1,245 +0,0 @@
{
"openapi": "3.0.0",
"info": {
"version": "1.0.0",
"title": "LiteLLM API",
"description": "API for LiteLLM"
},
"paths": {
"/chat/completions": {
"post": {
"summary": "Create chat completion for 100+ LLM APIs",
"requestBody": {
"description": "Input parameters for chat completions",
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsRequest"
},
"example": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "this is a test message from litellm proxy, can you ack"
}
],
"frequency_penalty": 0.0,
"max_tokens": 500,
"n": 1,
"presence_penalty": 0.0,
"stop": "###",
"stream": false,
"temperature": 0.7,
"top_p": 0.8,
"user": "test-litellm"
}
}
}
},
"responses": {
"200": {
"description": "Successful operation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsResponse"
},
"example": {
"object": "chat.completion",
"id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
"choices": [
{
"finish_reason": "stop_sequence",
"index": 0,
"message": {
"content": "I'm a large language model trained by OpenAI, ACK receiving this message",
"role": "assistant"
}
}
],
"created": 1698253693.169062,
"model": "gpt-3.5-turbo",
"usage": {
"prompt_tokens": 14,
"completion_tokens": 102,
"total_tokens": 116
}
}
}
}
},
"500": {
"description": "Server error"
}
}
}
},
"/models": {
"get": {
"summary": "Get models",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
},
"/": {
"get": {
"summary": "Swagger docs",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
}
},
"components": {
"schemas": {
"ChatCompletionsRequest": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {
"type": "string"
},
"content": {
"type": "string"
}
},
"required": ["role", "content"]
}
},
"model": {
"type": "string"
},
"frequency_penalty": {
"type": "number"
},
"function_call": {
"type": ["string", "object"]
},
"functions": {
"type": "array"
},
"logit_bias": {
"type": "object"
},
"max_tokens": {
"type": "integer"
},
"n": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"stop": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"stream": {
"type": "boolean"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"user": {
"type": "string"
},
"caching": {
"type": "boolean"
}
},
"required": ["messages", "model"]
},
"ChatCompletionsResponse": {
"type": "object",
"properties": {
"object": {
"type": "string"
},
"choices": {
"type": "array",
"items": {
"type": "object",
"properties": {
"finish_reason": {
"type": "string"
},
"index": {
"type": "integer"
},
"message": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"role": {
"type": "string"
}
},
"required": ["content", "role"]
},
"usage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer"
},
"completion_tokens": {
"type": "integer"
},
"total_tokens": {
"type": "integer"
}
},
"required": ["prompt_tokens", "completion_tokens", "total_tokens"]
}
},
"required": ["finish_reason", "index", "message", "usage"]
}
},
"id": {
"type": "string"
},
"created": {
"type": "number"
},
"model": {
"type": "string"
}
},
"required": ["object", "choices", "id", "created", "model"]
}
}
}
}

View file

@ -1,7 +1,7 @@
openai # openai
fastapi # fastapi
uvicorn # uvicorn
boto3 # boto3
litellm # litellm
python-dotenv # python-dotenv
redis # redis

View file

@ -1,86 +1,86 @@
import os, litellm # import os, litellm
import pkg_resources # import pkg_resources
import dotenv # import dotenv
dotenv.load_dotenv() # load env variables # dotenv.load_dotenv() # load env variables
def print_verbose(print_statement): # def print_verbose(print_statement):
pass # pass
def get_package_version(package_name): # def get_package_version(package_name):
try: # try:
package = pkg_resources.get_distribution(package_name) # package = pkg_resources.get_distribution(package_name)
return package.version # return package.version
except pkg_resources.DistributionNotFound: # except pkg_resources.DistributionNotFound:
return None # return None
# Usage example # # Usage example
package_name = "litellm" # package_name = "litellm"
version = get_package_version(package_name) # version = get_package_version(package_name)
if version: # if version:
print_verbose(f"The version of {package_name} is {version}") # print_verbose(f"The version of {package_name} is {version}")
else: # else:
print_verbose(f"{package_name} is not installed") # print_verbose(f"{package_name} is not installed")
import yaml # import yaml
import dotenv # import dotenv
from typing import Optional # from typing import Optional
dotenv.load_dotenv() # load env variables # dotenv.load_dotenv() # load env variables
def set_callbacks(): # def set_callbacks():
## LOGGING # ## LOGGING
if len(os.getenv("SET_VERBOSE", "")) > 0: # if len(os.getenv("SET_VERBOSE", "")) > 0:
if os.getenv("SET_VERBOSE") == "True": # if os.getenv("SET_VERBOSE") == "True":
litellm.set_verbose = True # litellm.set_verbose = True
print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m") # print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
else: # else:
litellm.set_verbose = False # litellm.set_verbose = False
### LANGFUSE # ### LANGFUSE
if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0: # if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
litellm.success_callback = ["langfuse"] # litellm.success_callback = ["langfuse"]
print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m") # print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
## CACHING # ## CACHING
### REDIS # ### REDIS
# if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0: # # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
# print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}") # # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
# from litellm.caching import Cache # # from litellm.caching import Cache
# litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD")) # # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
# print("\033[92mLiteLLM: Switched on Redis caching\033[0m") # # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'): # def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
config = {} # config = {}
server_settings = {} # server_settings = {}
try: # try:
if os.path.exists(config_file_path): # type: ignore # if os.path.exists(config_file_path): # type: ignore
with open(config_file_path, 'r') as file: # type: ignore # with open(config_file_path, 'r') as file: # type: ignore
config = yaml.safe_load(file) # config = yaml.safe_load(file)
else: # else:
pass # pass
except: # except:
pass # pass
## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral') # ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
server_settings = config.get("server_settings", None) # server_settings = config.get("server_settings", None)
if server_settings: # if server_settings:
server_settings = server_settings # server_settings = server_settings
## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..) # ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
litellm_settings = config.get('litellm_settings', None) # litellm_settings = config.get('litellm_settings', None)
if litellm_settings: # if litellm_settings:
for key, value in litellm_settings.items(): # for key, value in litellm_settings.items():
setattr(litellm, key, value) # setattr(litellm, key, value)
## MODEL LIST # ## MODEL LIST
model_list = config.get('model_list', None) # model_list = config.get('model_list', None)
if model_list: # if model_list:
router = litellm.Router(model_list=model_list) # router = litellm.Router(model_list=model_list)
## ENVIRONMENT VARIABLES # ## ENVIRONMENT VARIABLES
environment_variables = config.get('environment_variables', None) # environment_variables = config.get('environment_variables', None)
if environment_variables: # if environment_variables:
for key, value in environment_variables.items(): # for key, value in environment_variables.items():
os.environ[key] = value # os.environ[key] = value
return router, model_list, server_settings # return router, model_list, server_settings