fix(litellm_server): commenting out the code

This commit is contained in:
Krrish Dholakia 2023-11-20 15:39:05 -08:00
parent 1976d0f7d6
commit 1ce505cbfb
8 changed files with 290 additions and 656 deletions

View file

@ -1,43 +1,43 @@
# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
# # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
# AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
OPENAI_API_KEY = ""
# OPENAI_API_KEY = ""
HUGGINGFACE_API_KEY=""
# HUGGINGFACE_API_KEY=""
TOGETHERAI_API_KEY=""
# TOGETHERAI_API_KEY=""
REPLICATE_API_KEY=""
# REPLICATE_API_KEY=""
## bedrock / sagemaker
AWS_ACCESS_KEY_ID = ""
AWS_SECRET_ACCESS_KEY = ""
# ## bedrock / sagemaker
# AWS_ACCESS_KEY_ID = ""
# AWS_SECRET_ACCESS_KEY = ""
AZURE_API_KEY = ""
AZURE_API_BASE = ""
AZURE_API_VERSION = ""
# AZURE_API_KEY = ""
# AZURE_API_BASE = ""
# AZURE_API_VERSION = ""
ANTHROPIC_API_KEY = ""
# ANTHROPIC_API_KEY = ""
COHERE_API_KEY = ""
# COHERE_API_KEY = ""
## CONFIG FILE ##
# CONFIG_FILE_PATH = "" # uncomment to point to config file
# ## CONFIG FILE ##
# # CONFIG_FILE_PATH = "" # uncomment to point to config file
## LOGGING ##
# ## LOGGING ##
SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
# SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
### LANGFUSE
LANGFUSE_PUBLIC_KEY = ""
LANGFUSE_SECRET_KEY = ""
# Optional, defaults to https://cloud.langfuse.com
LANGFUSE_HOST = "" # optional
# ### LANGFUSE
# LANGFUSE_PUBLIC_KEY = ""
# LANGFUSE_SECRET_KEY = ""
# # Optional, defaults to https://cloud.langfuse.com
# LANGFUSE_HOST = "" # optional
## CACHING ##
# ## CACHING ##
### REDIS
REDIS_HOST = ""
REDIS_PORT = ""
REDIS_PASSWORD = ""
# ### REDIS
# REDIS_HOST = ""
# REDIS_PORT = ""
# REDIS_PASSWORD = ""

View file

@ -1,10 +1,10 @@
FROM python:3.10
# FROM python:3.10
ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
COPY . /app
WORKDIR /app
RUN pip install -r requirements.txt
# ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
# COPY . /app
# WORKDIR /app
# RUN pip install -r requirements.txt
EXPOSE $PORT
# EXPOSE $PORT
CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10
# CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10

View file

@ -1,124 +1,3 @@
# litellm-server [experimental]
Load balancer for multiple API Deployments (eg. Azure/OpenAI)
<img width="1036" alt="Screenshot 2023-11-06 at 6 54 16 PM" src="https://github.com/BerriAI/litellm/assets/17561003/d32da338-1d72-45bb-bca8-ac70f1d3e980">
LiteLLM Server supports:
- LLM API Calls in the OpenAI ChatCompletions format
- Caching + Logging capabilities (Redis and Langfuse, respectively)
- Setting API keys in the request headers or in the .env
## Usage
```shell
docker run -e PORT=8000 -e OPENAI_API_KEY=<your-openai-key> -p 8000:8000 ghcr.io/berriai/litellm:latest
```
OpenAI Proxy running on http://0.0.0.0:8000
```shell
curl http://0.0.0.0:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/providers)
## Endpoints:
- `/chat/completions` - chat completions endpoint to call 100+ LLMs
- `/models` - available models on server
## Save Model-specific params (API Base, API Keys, Temperature, etc.)
Use the [router_config_template.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
1. Create a `config.yaml` file
```shell
model_list:
- model_name: gpt-3.5-turbo # set model alias
litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
model: azure/chatgpt-v-2 # azure/<your-deployment-name> <- actual name used for litellm.completion()
api_key: your_azure_api_key
api_version: your_azure_api_version
api_base: your_azure_api_base
- model_name: mistral-7b
litellm_params:
model: ollama/mistral
api_base: your_ollama_api_base
```
2. Start the server
```shell
docker run -e PORT=8000 -p 8000:8000 -v $(pwd)/config.yaml:/app/config.yaml ghcr.io/berriai/litellm:latest
```
## Caching
Add Redis Caching to your server via environment variables
```env
### REDIS
REDIS_HOST = ""
REDIS_PORT = ""
REDIS_PASSWORD = ""
```
Docker command:
```shell
docker run -e REDIST_HOST=<your-redis-host> -e REDIS_PORT=<your-redis-port> -e REDIS_PASSWORD=<your-redis-password> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
## Logging
1. Debug Logs
Print the input/output params by setting `SET_VERBOSE = "True"`.
Docker command:
```shell
docker run -e SET_VERBOSE="True" -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
Add Langfuse Logging to your server via environment variables
```env
### LANGFUSE
LANGFUSE_PUBLIC_KEY = ""
LANGFUSE_SECRET_KEY = ""
# Optional, defaults to https://cloud.langfuse.com
LANGFUSE_HOST = "" # optional
```
Docker command:
```shell
docker run -e LANGFUSE_PUBLIC_KEY=<your-public-key> -e LANGFUSE_SECRET_KEY=<your-secret-key> -e LANGFUSE_HOST=<your-langfuse-host> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
```
## Running Locally
```shell
$ git clone https://github.com/BerriAI/litellm.git
```
```shell
$ cd ./litellm/litellm_server
```
```shell
$ uvicorn main:app --host 0.0.0.0 --port 8000
```
### Custom Config
1. Create + Modify [router_config.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) (save your azure/openai/etc. deployment info)
```shell
cp ./router_config_template.yaml ./router_config.yaml
```
2. Build Docker Image
```shell
docker build -t litellm_server . --build-arg CONFIG_FILE=./router_config.yaml
```
3. Run Docker Image
```shell
docker run --name litellm_server -e PORT=8000 -p 8000:8000 litellm_server
```
Deprecated. See litellm/proxy

View file

@ -1,2 +1,2 @@
from .main import *
from .server_utils import *
# from .main import *
# from .server_utils import *

View file

@ -1,193 +1,193 @@
import os, traceback
from fastapi import FastAPI, Request, HTTPException
from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
import json, sys
from typing import Optional
sys.path.insert(
0, os.path.abspath("../")
) # Adds the parent directory to the system path - for litellm local dev
import litellm
# import os, traceback
# from fastapi import FastAPI, Request, HTTPException
# from fastapi.routing import APIRouter
# from fastapi.responses import StreamingResponse, FileResponse
# from fastapi.middleware.cors import CORSMiddleware
# import json, sys
# from typing import Optional
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path - for litellm local dev
# import litellm
try:
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
except ImportError:
from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
import dotenv
dotenv.load_dotenv() # load env variables
# try:
# from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
# except ImportError:
# from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
# import dotenv
# dotenv.load_dotenv() # load env variables
app = FastAPI(docs_url="/", title="LiteLLM API")
router = APIRouter()
origins = ["*"]
# app = FastAPI(docs_url="/", title="LiteLLM API")
# router = APIRouter()
# origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
#### GLOBAL VARIABLES ####
llm_router: Optional[litellm.Router] = None
llm_model_list: Optional[list] = None
server_settings: Optional[dict] = None
# app.add_middleware(
# CORSMiddleware,
# allow_origins=origins,
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
# #### GLOBAL VARIABLES ####
# llm_router: Optional[litellm.Router] = None
# llm_model_list: Optional[list] = None
# server_settings: Optional[dict] = None
set_callbacks() # sets litellm callbacks for logging if they exist in the environment
# set_callbacks() # sets litellm callbacks for logging if they exist in the environment
if "CONFIG_FILE_PATH" in os.environ:
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
else:
llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
#### API ENDPOINTS ####
@router.get("/v1/models")
@router.get("/models") # if project requires model list
def model_list():
all_models = litellm.utils.get_valid_models()
if llm_model_list:
all_models += llm_model_list
return dict(
data=[
{
"id": model,
"object": "model",
"created": 1677610602,
"owned_by": "openai",
}
for model in all_models
],
object="list",
)
# for streaming
def data_generator(response):
# if "CONFIG_FILE_PATH" in os.environ:
# llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
# else:
# llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
# #### API ENDPOINTS ####
# @router.get("/v1/models")
# @router.get("/models") # if project requires model list
# def model_list():
# all_models = litellm.utils.get_valid_models()
# if llm_model_list:
# all_models += llm_model_list
# return dict(
# data=[
# {
# "id": model,
# "object": "model",
# "created": 1677610602,
# "owned_by": "openai",
# }
# for model in all_models
# ],
# object="list",
# )
# # for streaming
# def data_generator(response):
for chunk in response:
# for chunk in response:
yield f"data: {json.dumps(chunk)}\n\n"
# yield f"data: {json.dumps(chunk)}\n\n"
@router.post("/v1/completions")
@router.post("/completions")
async def completion(request: Request):
data = await request.json()
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
# @router.post("/v1/completions")
# @router.post("/completions")
# async def completion(request: Request):
# data = await request.json()
# response = litellm.completion(
# **data
# )
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
@router.post("/v1/embeddings")
@router.post("/embeddings")
async def embedding(request: Request):
try:
data = await request.json()
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
api_key = request.headers.get("authorization")
api_key = api_key.replace("Bearer", "").strip() # type: ignore
if len(api_key.strip()) > 0:
api_key = api_key
data["api_key"] = api_key
response = litellm.embedding(
**data
)
return response
except Exception as e:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
return {"error": error_msg}
# @router.post("/v1/embeddings")
# @router.post("/embeddings")
# async def embedding(request: Request):
# try:
# data = await request.json()
# # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
# if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
# api_key = request.headers.get("authorization")
# api_key = api_key.replace("Bearer", "").strip() # type: ignore
# if len(api_key.strip()) > 0:
# api_key = api_key
# data["api_key"] = api_key
# response = litellm.embedding(
# **data
# )
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
@router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
async def chat_completion(request: Request, model: Optional[str] = None):
global llm_model_list, server_settings
try:
data = await request.json()
server_model = server_settings.get("completion_model", None) if server_settings else None
data["model"] = server_model or model or data["model"]
## CHECK KEYS ##
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
# env_validation = litellm.validate_environment(model=data["model"])
# if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
# if "authorization" in request.headers:
# api_key = request.headers.get("authorization")
# elif "api-key" in request.headers:
# api_key = request.headers.get("api-key")
# print(f"api_key in headers: {api_key}")
# if " " in api_key:
# api_key = api_key.split(" ")[1]
# print(f"api_key split: {api_key}")
# if len(api_key) > 0:
# api_key = api_key
# data["api_key"] = api_key
# print(f"api_key in data: {api_key}")
## CHECK CONFIG ##
if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
for m in llm_model_list:
if data["model"] == m["model_name"]:
for key, value in m["litellm_params"].items():
data[key] = value
break
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
except Exception as e:
error_traceback = traceback.format_exc()
# @router.post("/v1/chat/completions")
# @router.post("/chat/completions")
# @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
# async def chat_completion(request: Request, model: Optional[str] = None):
# global llm_model_list, server_settings
# try:
# data = await request.json()
# server_model = server_settings.get("completion_model", None) if server_settings else None
# data["model"] = server_model or model or data["model"]
# ## CHECK KEYS ##
# # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
# # env_validation = litellm.validate_environment(model=data["model"])
# # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
# # if "authorization" in request.headers:
# # api_key = request.headers.get("authorization")
# # elif "api-key" in request.headers:
# # api_key = request.headers.get("api-key")
# # print(f"api_key in headers: {api_key}")
# # if " " in api_key:
# # api_key = api_key.split(" ")[1]
# # print(f"api_key split: {api_key}")
# # if len(api_key) > 0:
# # api_key = api_key
# # data["api_key"] = api_key
# # print(f"api_key in data: {api_key}")
# ## CHECK CONFIG ##
# if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
# for m in llm_model_list:
# if data["model"] == m["model_name"]:
# for key, value in m["litellm_params"].items():
# data[key] = value
# break
# response = litellm.completion(
# **data
# )
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
raise HTTPException(status_code=500, detail=error_msg)
# error_msg = f"{str(e)}\n\n{error_traceback}"
# # return {"error": error_msg}
# raise HTTPException(status_code=500, detail=error_msg)
@router.post("/router/completions")
async def router_completion(request: Request):
global llm_router
try:
data = await request.json()
if "model_list" in data:
llm_router = litellm.Router(model_list=data.pop("model_list"))
if llm_router is None:
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
# @router.post("/router/completions")
# async def router_completion(request: Request):
# global llm_router
# try:
# data = await request.json()
# if "model_list" in data:
# llm_router = litellm.Router(model_list=data.pop("model_list"))
# if llm_router is None:
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
# openai.ChatCompletion.create replacement
response = await llm_router.acompletion(model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}])
# # openai.ChatCompletion.create replacement
# response = await llm_router.acompletion(model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": "Hey, how's it going?"}])
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
except Exception as e:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
return {"error": error_msg}
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
@router.post("/router/embedding")
async def router_embedding(request: Request):
global llm_router
try:
data = await request.json()
if "model_list" in data:
llm_router = litellm.Router(model_list=data.pop("model_list"))
if llm_router is None:
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
# @router.post("/router/embedding")
# async def router_embedding(request: Request):
# global llm_router
# try:
# data = await request.json()
# if "model_list" in data:
# llm_router = litellm.Router(model_list=data.pop("model_list"))
# if llm_router is None:
# raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
messages=[{"role": "user", "content": "Hey, how's it going?"}])
# response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
# messages=[{"role": "user", "content": "Hey, how's it going?"}])
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
except Exception as e:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"
return {"error": error_msg}
# if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
# return StreamingResponse(data_generator(response), media_type='text/event-stream')
# return response
# except Exception as e:
# error_traceback = traceback.format_exc()
# error_msg = f"{str(e)}\n\n{error_traceback}"
# return {"error": error_msg}
@router.get("/")
async def home(request: Request):
return "LiteLLM: RUNNING"
# @router.get("/")
# async def home(request: Request):
# return "LiteLLM: RUNNING"
app.include_router(router)
# app.include_router(router)

View file

@ -1,245 +0,0 @@
{
"openapi": "3.0.0",
"info": {
"version": "1.0.0",
"title": "LiteLLM API",
"description": "API for LiteLLM"
},
"paths": {
"/chat/completions": {
"post": {
"summary": "Create chat completion for 100+ LLM APIs",
"requestBody": {
"description": "Input parameters for chat completions",
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsRequest"
},
"example": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "this is a test message from litellm proxy, can you ack"
}
],
"frequency_penalty": 0.0,
"max_tokens": 500,
"n": 1,
"presence_penalty": 0.0,
"stop": "###",
"stream": false,
"temperature": 0.7,
"top_p": 0.8,
"user": "test-litellm"
}
}
}
},
"responses": {
"200": {
"description": "Successful operation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsResponse"
},
"example": {
"object": "chat.completion",
"id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
"choices": [
{
"finish_reason": "stop_sequence",
"index": 0,
"message": {
"content": "I'm a large language model trained by OpenAI, ACK receiving this message",
"role": "assistant"
}
}
],
"created": 1698253693.169062,
"model": "gpt-3.5-turbo",
"usage": {
"prompt_tokens": 14,
"completion_tokens": 102,
"total_tokens": 116
}
}
}
}
},
"500": {
"description": "Server error"
}
}
}
},
"/models": {
"get": {
"summary": "Get models",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
},
"/": {
"get": {
"summary": "Swagger docs",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
}
},
"components": {
"schemas": {
"ChatCompletionsRequest": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {
"type": "string"
},
"content": {
"type": "string"
}
},
"required": ["role", "content"]
}
},
"model": {
"type": "string"
},
"frequency_penalty": {
"type": "number"
},
"function_call": {
"type": ["string", "object"]
},
"functions": {
"type": "array"
},
"logit_bias": {
"type": "object"
},
"max_tokens": {
"type": "integer"
},
"n": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"stop": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"stream": {
"type": "boolean"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"user": {
"type": "string"
},
"caching": {
"type": "boolean"
}
},
"required": ["messages", "model"]
},
"ChatCompletionsResponse": {
"type": "object",
"properties": {
"object": {
"type": "string"
},
"choices": {
"type": "array",
"items": {
"type": "object",
"properties": {
"finish_reason": {
"type": "string"
},
"index": {
"type": "integer"
},
"message": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"role": {
"type": "string"
}
},
"required": ["content", "role"]
},
"usage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer"
},
"completion_tokens": {
"type": "integer"
},
"total_tokens": {
"type": "integer"
}
},
"required": ["prompt_tokens", "completion_tokens", "total_tokens"]
}
},
"required": ["finish_reason", "index", "message", "usage"]
}
},
"id": {
"type": "string"
},
"created": {
"type": "number"
},
"model": {
"type": "string"
}
},
"required": ["object", "choices", "id", "created", "model"]
}
}
}
}

View file

@ -1,7 +1,7 @@
openai
fastapi
uvicorn
boto3
litellm
python-dotenv
redis
# openai
# fastapi
# uvicorn
# boto3
# litellm
# python-dotenv
# redis

View file

@ -1,86 +1,86 @@
import os, litellm
import pkg_resources
import dotenv
dotenv.load_dotenv() # load env variables
# import os, litellm
# import pkg_resources
# import dotenv
# dotenv.load_dotenv() # load env variables
def print_verbose(print_statement):
pass
# def print_verbose(print_statement):
# pass
def get_package_version(package_name):
try:
package = pkg_resources.get_distribution(package_name)
return package.version
except pkg_resources.DistributionNotFound:
return None
# def get_package_version(package_name):
# try:
# package = pkg_resources.get_distribution(package_name)
# return package.version
# except pkg_resources.DistributionNotFound:
# return None
# Usage example
package_name = "litellm"
version = get_package_version(package_name)
if version:
print_verbose(f"The version of {package_name} is {version}")
else:
print_verbose(f"{package_name} is not installed")
import yaml
import dotenv
from typing import Optional
dotenv.load_dotenv() # load env variables
# # Usage example
# package_name = "litellm"
# version = get_package_version(package_name)
# if version:
# print_verbose(f"The version of {package_name} is {version}")
# else:
# print_verbose(f"{package_name} is not installed")
# import yaml
# import dotenv
# from typing import Optional
# dotenv.load_dotenv() # load env variables
def set_callbacks():
## LOGGING
if len(os.getenv("SET_VERBOSE", "")) > 0:
if os.getenv("SET_VERBOSE") == "True":
litellm.set_verbose = True
print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
else:
litellm.set_verbose = False
# def set_callbacks():
# ## LOGGING
# if len(os.getenv("SET_VERBOSE", "")) > 0:
# if os.getenv("SET_VERBOSE") == "True":
# litellm.set_verbose = True
# print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
# else:
# litellm.set_verbose = False
### LANGFUSE
if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
litellm.success_callback = ["langfuse"]
print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
# ### LANGFUSE
# if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
# litellm.success_callback = ["langfuse"]
# print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
## CACHING
### REDIS
# if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
# print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
# from litellm.caching import Cache
# litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
# print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
# ## CACHING
# ### REDIS
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
# # from litellm.caching import Cache
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
config = {}
server_settings = {}
try:
if os.path.exists(config_file_path): # type: ignore
with open(config_file_path, 'r') as file: # type: ignore
config = yaml.safe_load(file)
else:
pass
except:
pass
# def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
# config = {}
# server_settings = {}
# try:
# if os.path.exists(config_file_path): # type: ignore
# with open(config_file_path, 'r') as file: # type: ignore
# config = yaml.safe_load(file)
# else:
# pass
# except:
# pass
## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
server_settings = config.get("server_settings", None)
if server_settings:
server_settings = server_settings
# ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
# server_settings = config.get("server_settings", None)
# if server_settings:
# server_settings = server_settings
## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
litellm_settings = config.get('litellm_settings', None)
if litellm_settings:
for key, value in litellm_settings.items():
setattr(litellm, key, value)
# ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
# litellm_settings = config.get('litellm_settings', None)
# if litellm_settings:
# for key, value in litellm_settings.items():
# setattr(litellm, key, value)
## MODEL LIST
model_list = config.get('model_list', None)
if model_list:
router = litellm.Router(model_list=model_list)
# ## MODEL LIST
# model_list = config.get('model_list', None)
# if model_list:
# router = litellm.Router(model_list=model_list)
## ENVIRONMENT VARIABLES
environment_variables = config.get('environment_variables', None)
if environment_variables:
for key, value in environment_variables.items():
os.environ[key] = value
# ## ENVIRONMENT VARIABLES
# environment_variables = config.get('environment_variables', None)
# if environment_variables:
# for key, value in environment_variables.items():
# os.environ[key] = value
return router, model_list, server_settings
# return router, model_list, server_settings