forked from phoenix/litellm-mirror
refactor(openai_proxy-->-litellm_server): renaming project for simplicity
This commit is contained in:
parent
f6be642f2f
commit
16f39ec840
15 changed files with 1 additions and 1 deletions
40
litellm_server/.env.template
Normal file
40
litellm_server/.env.template
Normal file
|
@ -0,0 +1,40 @@
|
|||
# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
|
||||
AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
|
||||
|
||||
OPENAI_API_KEY = ""
|
||||
|
||||
HUGGINGFACE_API_KEY=""
|
||||
|
||||
TOGETHERAI_API_KEY=""
|
||||
|
||||
REPLICATE_API_KEY=""
|
||||
|
||||
## bedrock / sagemaker
|
||||
AWS_ACCESS_KEY_ID = ""
|
||||
AWS_SECRET_ACCESS_KEY = ""
|
||||
|
||||
AZURE_API_KEY = ""
|
||||
AZURE_API_BASE = ""
|
||||
AZURE_API_VERSION = ""
|
||||
|
||||
ANTHROPIC_API_KEY = ""
|
||||
|
||||
COHERE_API_KEY = ""
|
||||
|
||||
## LOGGING ##
|
||||
|
||||
SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
|
||||
|
||||
### LANGFUSE
|
||||
LANGFUSE_PUBLIC_KEY = ""
|
||||
LANGFUSE_SECRET_KEY = ""
|
||||
# Optional, defaults to https://cloud.langfuse.com
|
||||
LANGFUSE_HOST = "" # optional
|
||||
|
||||
|
||||
## CACHING ##
|
||||
|
||||
### REDIS
|
||||
REDIS_HOST = ""
|
||||
REDIS_PORT = ""
|
||||
REDIS_PASSWORD = ""
|
10
litellm_server/Dockerfile
Normal file
10
litellm_server/Dockerfile
Normal file
|
@ -0,0 +1,10 @@
|
|||
FROM python:3.10
|
||||
|
||||
ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
|
||||
COPY . /app
|
||||
WORKDIR /app
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
EXPOSE $PORT
|
||||
|
||||
CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT
|
63
litellm_server/README.md
Normal file
63
litellm_server/README.md
Normal file
|
@ -0,0 +1,63 @@
|
|||
# litellm-server
|
||||
|
||||
A simple, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
|
||||
|
||||
<p align="center" style="margin: 2%">
|
||||
<a href="https://l.linklyhq.com/l/1uHsr" target="_blank">
|
||||
<img src="https://render.com/images/deploy-to-render-button.svg" width="173"/>
|
||||
</a>
|
||||
<a href="https://l.linklyhq.com/l/1uHtX" target="_blank">
|
||||
<img src="https://deploy.cloud.run/button.svg" width="200"/>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
## Usage
|
||||
|
||||
```shell
|
||||
docker run -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
|
||||
|
||||
# UVICORN: OpenAI Proxy running on http://0.0.0.0:8000
|
||||
```
|
||||
|
||||
## Endpoints:
|
||||
- `/chat/completions` - chat completions endpoint to call 100+ LLMs
|
||||
- `/router/completions` - for multiple deployments of the same model (e.g. Azure OpenAI), uses the least used deployment. [Learn more](https://docs.litellm.ai/docs/routing)
|
||||
- `/models` - available models on server
|
||||
|
||||
## Making Requests to Proxy
|
||||
### Curl
|
||||
|
||||
**Call OpenAI**
|
||||
```shell
|
||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
**Call Bedrock**
|
||||
```shell
|
||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "bedrock/anthropic.claude-instant-v1",
|
||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
### Running Locally
|
||||
```shell
|
||||
$ git clone https://github.com/BerriAI/litellm.git
|
||||
```
|
||||
```shell
|
||||
$ cd ./litellm/openai-proxy
|
||||
```
|
||||
|
||||
```shell
|
||||
$ uvicorn main:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/simple_proxy)
|
2
litellm_server/__init__.py
Normal file
2
litellm_server/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from .main import *
|
||||
from .utils import *
|
0
litellm_server/config
Normal file
0
litellm_server/config
Normal file
160
litellm_server/main.py
Normal file
160
litellm_server/main.py
Normal file
|
@ -0,0 +1,160 @@
|
|||
import litellm, os, traceback
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.routing import APIRouter
|
||||
from fastapi.responses import StreamingResponse, FileResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import json
|
||||
import os
|
||||
from typing import Optional
|
||||
try:
|
||||
from utils import set_callbacks, load_router_config
|
||||
except ImportError:
|
||||
from openai_proxy.utils import set_callbacks, load_router_config
|
||||
import dotenv
|
||||
dotenv.load_dotenv() # load env variables
|
||||
|
||||
app = FastAPI(docs_url="/", title="LiteLLM API")
|
||||
router = APIRouter()
|
||||
origins = ["*"]
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
#### GLOBAL VARIABLES ####
|
||||
llm_router: Optional[litellm.Router] = None
|
||||
|
||||
set_callbacks() # sets litellm callbacks for logging if they exist in the environment
|
||||
llm_router = load_router_config(router=llm_router)
|
||||
#### API ENDPOINTS ####
|
||||
@router.post("/v1/models")
|
||||
@router.get("/models") # if project requires model list
|
||||
def model_list():
|
||||
all_models = litellm.utils.get_valid_models()
|
||||
return dict(
|
||||
data=[
|
||||
{
|
||||
"id": model,
|
||||
"object": "model",
|
||||
"created": 1677610602,
|
||||
"owned_by": "openai",
|
||||
}
|
||||
for model in all_models
|
||||
],
|
||||
object="list",
|
||||
)
|
||||
# for streaming
|
||||
def data_generator(response):
|
||||
print("inside generator")
|
||||
for chunk in response:
|
||||
print(f"returned chunk: {chunk}")
|
||||
yield f"data: {json.dumps(chunk)}\n\n"
|
||||
|
||||
@router.post("/v1/completions")
|
||||
@router.post("/completions")
|
||||
async def completion(request: Request):
|
||||
data = await request.json()
|
||||
response = litellm.completion(
|
||||
**data
|
||||
)
|
||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
|
||||
@router.post("/v1/embeddings")
|
||||
@router.post("/embeddings")
|
||||
async def embedding(request: Request):
|
||||
try:
|
||||
data = await request.json()
|
||||
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
||||
if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
|
||||
api_key = request.headers.get("authorization")
|
||||
api_key = api_key.replace("Bearer", "").strip()
|
||||
if len(api_key.strip()) > 0:
|
||||
api_key = api_key
|
||||
data["api_key"] = api_key
|
||||
response = litellm.embedding(
|
||||
**data
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
return {"error": error_msg}
|
||||
|
||||
@router.post("/v1/chat/completions")
|
||||
@router.post("/chat/completions")
|
||||
async def chat_completion(request: Request):
|
||||
try:
|
||||
data = await request.json()
|
||||
# default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
|
||||
if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
|
||||
api_key = request.headers.get("authorization")
|
||||
api_key = api_key.replace("Bearer", "").strip()
|
||||
if len(api_key.strip()) > 0:
|
||||
api_key = api_key
|
||||
data["api_key"] = api_key
|
||||
response = litellm.completion(
|
||||
**data
|
||||
)
|
||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
except Exception as e:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
return {"error": error_msg}
|
||||
# raise HTTPException(status_code=500, detail=error_msg)
|
||||
|
||||
@router.post("/router/completions")
|
||||
async def router_completion(request: Request):
|
||||
global llm_router
|
||||
try:
|
||||
data = await request.json()
|
||||
if "model_list" in data:
|
||||
llm_router = litellm.Router(model_list=data.pop("model_list"))
|
||||
if llm_router is None:
|
||||
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
||||
|
||||
# openai.ChatCompletion.create replacement
|
||||
response = await llm_router.acompletion(model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||
|
||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
except Exception as e:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
return {"error": error_msg}
|
||||
|
||||
@router.post("/router/embedding")
|
||||
async def router_embedding(request: Request):
|
||||
global llm_router
|
||||
try:
|
||||
data = await request.json()
|
||||
if "model_list" in data:
|
||||
llm_router = litellm.Router(model_list=data.pop("model_list"))
|
||||
if llm_router is None:
|
||||
raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
|
||||
|
||||
response = await llm_router.aembedding(model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||
|
||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
except Exception as e:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
return {"error": error_msg}
|
||||
|
||||
@router.get("/")
|
||||
async def home(request: Request):
|
||||
return "LiteLLM: RUNNING"
|
||||
|
||||
|
||||
app.include_router(router)
|
242
litellm_server/openapi.json
Normal file
242
litellm_server/openapi.json
Normal file
|
@ -0,0 +1,242 @@
|
|||
{
|
||||
"openapi": "3.0.0",
|
||||
"info": {
|
||||
"version": "1.0.0",
|
||||
"title": "LiteLLM API",
|
||||
"description": "API for LiteLLM"
|
||||
},
|
||||
"paths": {
|
||||
"/chat/completions": {
|
||||
"post": {
|
||||
"summary": "Create chat completion for 100+ LLM APIs",
|
||||
"requestBody": {
|
||||
"description": "Input parameters for chat completions",
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ChatCompletionsRequest"
|
||||
},
|
||||
"example": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message from litellm proxy, can you ack"
|
||||
}
|
||||
],
|
||||
"frequency_penalty": 0.0,
|
||||
"max_tokens": 500,
|
||||
"n": 1,
|
||||
"presence_penalty": 0.0,
|
||||
"stop": "###",
|
||||
"stream": false,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.8,
|
||||
"user": "test-litellm"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful operation",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ChatCompletionsResponse"
|
||||
},
|
||||
"example": {
|
||||
"object": "chat.completion",
|
||||
"id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop_sequence",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "I'm a large language model trained by OpenAI, ACK receiving this message",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1698253693.169062,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"usage": {
|
||||
"prompt_tokens": 14,
|
||||
"completion_tokens": 102,
|
||||
"total_tokens": 116
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Server error"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/models": {
|
||||
"get": {
|
||||
"summary": "Get models",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful operation"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/": {
|
||||
"get": {
|
||||
"summary": "Swagger docs",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful operation"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
"schemas": {
|
||||
"ChatCompletionsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"role": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["role", "content"]
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"function_call": {
|
||||
"type": ["string", "object"]
|
||||
},
|
||||
"functions": {
|
||||
"type": "array"
|
||||
},
|
||||
"logit_bias": {
|
||||
"type": "object"
|
||||
},
|
||||
"max_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"n": {
|
||||
"type": "integer"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"stop": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"user": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["messages", "model"]
|
||||
},
|
||||
"ChatCompletionsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string"
|
||||
},
|
||||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"finish_reason": {
|
||||
"type": "string"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["content", "role"]
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completion_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["prompt_tokens", "completion_tokens", "total_tokens"]
|
||||
}
|
||||
},
|
||||
"required": ["finish_reason", "index", "message", "usage"]
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"created": {
|
||||
"type": "number"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["object", "choices", "id", "created", "model"]
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
7
litellm_server/requirements.txt
Normal file
7
litellm_server/requirements.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
openai
|
||||
fastapi
|
||||
uvicorn
|
||||
boto3
|
||||
litellm
|
||||
python-dotenv
|
||||
redis
|
39
litellm_server/tests/test_bedrock.py
Normal file
39
litellm_server/tests/test_bedrock.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
import openai
|
||||
openai.api_base = "http://0.0.0.0:8000"
|
||||
print("making request")
|
||||
openai.api_key = "anything" # this gets passed as a header
|
||||
|
||||
|
||||
response = openai.ChatCompletion.create(
|
||||
model = "bedrock/anthropic.claude-instant-v1",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message, what model / llm are you"
|
||||
}
|
||||
],
|
||||
aws_access_key_id="",
|
||||
aws_secret_access_key="",
|
||||
aws_region_name="us-west-2",
|
||||
max_tokens = 10,
|
||||
)
|
||||
|
||||
|
||||
print(response)
|
||||
|
||||
|
||||
# response = openai.ChatCompletion.create(
|
||||
# model = "gpt-3.5-turbo",
|
||||
# messages = [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "this is a test message, what model / llm are you"
|
||||
# }
|
||||
# ],
|
||||
# max_tokens = 10,
|
||||
# stream=True
|
||||
# )
|
||||
|
||||
|
||||
# for chunk in response:
|
||||
# print(chunk)
|
80
litellm_server/tests/test_caching.py
Normal file
80
litellm_server/tests/test_caching.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import openai, os, dotenv, traceback, time
|
||||
openai.api_base = "http://0.0.0.0:8000"
|
||||
dotenv.load_dotenv()
|
||||
openai.api_key = os.getenv("ANTHROPIC_API_KEY") # this gets passed as a header
|
||||
|
||||
|
||||
response1 = openai.ChatCompletion.create(
|
||||
model = "claude-instant-1",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short poem about litellm"
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
print(f"response: {response1['choices'][0]['message']['content']}")
|
||||
except:
|
||||
print(f"response: {response1}")
|
||||
|
||||
time.sleep(1) # allow time for request to be stored
|
||||
|
||||
response2 = openai.ChatCompletion.create(
|
||||
model = "claude-instant-1",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short poem about litellm"
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
print(f"response: {response2['choices'][0]['message']['content']}")
|
||||
except:
|
||||
print(f"response: {response2}")
|
||||
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
try:
|
||||
response3 = openai.ChatCompletion.create(
|
||||
model = "gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short poem about litellm"
|
||||
}
|
||||
],
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
|
||||
try:
|
||||
print(f"response: {response3['choices'][0]['message']['content']}")
|
||||
except:
|
||||
print(f"response: {response3}")
|
||||
|
||||
openai.api_key = os.getenv("ANTHROPIC_API_KEY") # this gets passed as a header
|
||||
# switch caching off using cache flag
|
||||
response4 = openai.ChatCompletion.create(
|
||||
model = "claude-instant-1",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short poem about litellm"
|
||||
}
|
||||
],
|
||||
caching = False,
|
||||
)
|
||||
|
||||
try:
|
||||
print(f"response: {response4['choices'][0]['message']['content']}")
|
||||
except:
|
||||
print(f"response: {response4}")
|
||||
|
||||
assert response1["choices"][0]["message"]["content"] == response2["choices"][0]["message"]["content"]
|
||||
assert response1["choices"][0]["message"]["content"] != response4["choices"][0]["message"]["content"]
|
||||
|
||||
assert response1["choices"][0]["message"]["content"] != response3["choices"][0]["message"]["content"]
|
39
litellm_server/tests/test_openai.py
Normal file
39
litellm_server/tests/test_openai.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
import openai
|
||||
openai.api_base = "http://127.0.0.1:8000"
|
||||
openai.api_key = "this can be anything"
|
||||
|
||||
print("making request")
|
||||
|
||||
api_key = ""
|
||||
response = openai.ChatCompletion.create(
|
||||
model = "gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message, what model / llm are you"
|
||||
}
|
||||
],
|
||||
api_key=api_key,
|
||||
max_tokens = 10,
|
||||
)
|
||||
|
||||
|
||||
print(response)
|
||||
|
||||
|
||||
response = openai.ChatCompletion.create(
|
||||
model = "gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message, what model / llm are you"
|
||||
}
|
||||
],
|
||||
api_key=api_key,
|
||||
max_tokens = 10,
|
||||
stream=True
|
||||
)
|
||||
|
||||
|
||||
for chunk in response:
|
||||
print(chunk)
|
38
litellm_server/tests/test_openrouter.py
Normal file
38
litellm_server/tests/test_openrouter.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import openai
|
||||
openai.api_base = "http://0.0.0.0:8000"
|
||||
openai.api_key = "this can be anything"
|
||||
print("making request")
|
||||
|
||||
api_key = ""
|
||||
response = openai.ChatCompletion.create(
|
||||
model = "openrouter/google/palm-2-chat-bison",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message, what model / llm are you"
|
||||
}
|
||||
],
|
||||
api_key=api_key,
|
||||
max_tokens = 10,
|
||||
)
|
||||
|
||||
|
||||
print(response)
|
||||
|
||||
|
||||
response = openai.ChatCompletion.create(
|
||||
model = "openrouter/google/palm-2-chat-bison",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test message, what model / llm are you"
|
||||
}
|
||||
],
|
||||
api_key=api_key,
|
||||
max_tokens = 10,
|
||||
stream=True
|
||||
)
|
||||
|
||||
|
||||
for chunk in response:
|
||||
print(chunk)
|
59
litellm_server/tests/test_router.py
Normal file
59
litellm_server/tests/test_router.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
#### What this tests ####
|
||||
# This tests calling batch_completions by running 100 messages together
|
||||
|
||||
import sys, os
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from fastapi import Request
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from openai_proxy import app
|
||||
|
||||
|
||||
def test_router_completion():
|
||||
client = TestClient(app)
|
||||
data = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
|
||||
"model_list": [{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 1000000,
|
||||
"rpm": 9000
|
||||
}]
|
||||
}
|
||||
|
||||
response = client.post("/router/completions", json=data)
|
||||
print(f"response: {response.text}")
|
||||
assert response.status_code == 200
|
||||
|
||||
response_data = response.json()
|
||||
# Perform assertions on the response data
|
||||
assert isinstance(response_data['choices'][0]['message']['content'], str)
|
||||
|
||||
test_router_completion()
|
70
litellm_server/utils.py
Normal file
70
litellm_server/utils.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
import os, litellm
|
||||
import pkg_resources
|
||||
|
||||
def get_package_version(package_name):
|
||||
try:
|
||||
package = pkg_resources.get_distribution(package_name)
|
||||
return package.version
|
||||
except pkg_resources.DistributionNotFound:
|
||||
return None
|
||||
|
||||
# Usage example
|
||||
package_name = "litellm"
|
||||
version = get_package_version(package_name)
|
||||
if version:
|
||||
print(f"The version of {package_name} is {version}")
|
||||
else:
|
||||
print(f"{package_name} is not installed")
|
||||
import yaml
|
||||
import dotenv
|
||||
from typing import Optional
|
||||
dotenv.load_dotenv() # load env variables
|
||||
|
||||
def set_callbacks():
|
||||
## LOGGING
|
||||
if len(os.getenv("SET_VERBOSE", "")) > 0:
|
||||
if os.getenv("SET_VERBOSE") == "True":
|
||||
litellm.set_verbose = True
|
||||
print("\033[92mLiteLLM: Switched on verbose logging\033[0m")
|
||||
else:
|
||||
litellm.set_verbose = False
|
||||
|
||||
### LANGFUSE
|
||||
if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
|
||||
litellm.success_callback = ["langfuse"]
|
||||
print("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
|
||||
|
||||
## CACHING
|
||||
### REDIS
|
||||
if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
||||
from litellm.caching import Cache
|
||||
litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
||||
print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
||||
|
||||
|
||||
|
||||
def load_router_config(router: Optional[litellm.Router]):
|
||||
config = {}
|
||||
config_file = '/app/config.yaml'
|
||||
|
||||
try:
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
else:
|
||||
print(f"Config file '{config_file}' not found.")
|
||||
except:
|
||||
print(f"Config file '{config_file}' not found.")
|
||||
|
||||
## MODEL LIST
|
||||
model_list = config.get('model_list', None)
|
||||
if model_list:
|
||||
router = litellm.Router(model_list=model_list)
|
||||
|
||||
## ENVIRONMENT VARIABLES
|
||||
environment_variables = config.get('environment_variables', None)
|
||||
if environment_variables:
|
||||
for key, value in environment_variables.items():
|
||||
os.environ[key] = value
|
||||
|
||||
return router
|
Loading…
Add table
Add a link
Reference in a new issue