(feat) proxy added tests

This commit is contained in:
ishaan-jaff 2023-10-21 12:33:35 -07:00
parent 2bd9b4acd8
commit e5e82c7474
5 changed files with 116 additions and 311 deletions

View file

@ -1,237 +0,0 @@
{
"openapi": "3.0.0",
"info": {
"version": "1.0.0",
"title": "LiteLLM API",
"description": "API for LiteLLM"
},
"paths": {
"/chat/completions": {
"post": {
"summary": "Create chat completion for 100+ LLM APIs",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "ID of the model to use"
},
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {
"type": "string",
"description": "The role of the message's author"
},
"content": {
"type": "string",
"description": "The contents of the message"
},
"name": {
"type": "string",
"description": "The name of the author of the message"
},
"function_call": {
"type": "object",
"description": "The name and arguments of a function that should be called"
}
}
}
},
"functions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the function to be called"
},
"description": {
"type": "string",
"description": "A description explaining what the function does"
},
"parameters": {
"type": "object",
"description": "The parameters that the function accepts"
},
"function_call": {
"type": "string",
"description": "Controls how the model responds to function calls"
}
}
}
},
"temperature": {
"type": "number",
"description": "The sampling temperature to be used"
},
"top_p": {
"type": "number",
"description": "An alternative to sampling with temperature"
},
"n": {
"type": "integer",
"description": "The number of chat completion choices to generate for each input message"
},
"stream": {
"type": "boolean",
"description": "If set to true, it sends partial message deltas"
},
"stop": {
"type": "array",
"items": {
"type": "string"
},
"description": "Up to 4 sequences where the API will stop generating further tokens"
},
"max_tokens": {
"type": "integer",
"description": "The maximum number of tokens to generate in the chat completion"
},
"presence_penalty": {
"type": "number",
"description": "It is used to penalize new tokens based on their existence in the text so far"
},
"frequency_penalty": {
"type": "number",
"description": "It is used to penalize new tokens based on their frequency in the text so far"
},
"logit_bias": {
"type": "object",
"description": "Used to modify the probability of specific tokens appearing in the completion"
},
"user": {
"type": "string",
"description": "A unique identifier representing your end-user"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful operation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"choices": {
"type": "array",
"items": {
"type": "object",
"properties": {
"finish_reason": {
"type": "string"
},
"index": {
"type": "integer"
},
"message": {
"type": "object",
"properties": {
"role": {
"type": "string"
},
"content": {
"type": "string"
}
}
}
}
}
},
"created": {
"type": "string"
},
"model": {
"type": "string"
},
"usage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer"
},
"completion_tokens": {
"type": "integer"
},
"total_tokens": {
"type": "integer"
}
}
}
}
}
}
}
},
"500": {
"description": "Server error"
}
}
}
},
"/completions": {
"post": {
"summary": "Create completion",
"responses": {
"200": {
"description": "Successful operation"
},
"500": {
"description": "Server error"
}
}
}
},
"/models": {
"get": {
"summary": "Get models",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
},
"/ollama_logs": {
"get": {
"summary": "Retrieve server logs for ollama models",
"responses": {
"200": {
"description": "Successful operation",
"content": {
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary"
}
}
}
}
}
}
},
"/": {
"get": {
"summary": "Home",
"responses": {
"200": {
"description": "Successful operation"
}
}
}
}
}
}

View file

@ -1,74 +0,0 @@
import litellm
from fastapi import FastAPI, Request
from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
import json
app = FastAPI(docs_url="/", title="LiteLLM API")
router = APIRouter()
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
#### API ENDPOINTS ####
@router.post("/v1/models")
@router.get("/models") # if project requires model list
def model_list():
all_models = litellm.utils.get_valid_models()
return dict(
data=[
{
"id": model,
"object": "model",
"created": 1677610602,
"owned_by": "openai",
}
for model in all_models
],
object="list",
)
# for streaming
def data_generator(response):
print("inside generator")
for chunk in response:
print(f"returned chunk: {chunk}")
yield f"data: {json.dumps(chunk)}\n\n"
@router.post("/v1/completions")
@router.post("/completions")
async def completion(request: Request):
data = await request.json()
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
async def chat_completion(request: Request):
data = await request.json()
response = litellm.completion(
**data
)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response
@router.get("/")
async def home(request: Request):
return "LiteLLM: RUNNING"
app.include_router(router)

View file

@ -0,0 +1,39 @@
import openai
openai.api_base = "http://127.0.0.1:8000"
print("making request")
openai.api_key = "anything" # this gets passed as a header
response = openai.ChatCompletion.create(
model = "bedrock/anthropic.claude-instant-v1",
messages = [
{
"role": "user",
"content": "this is a test message, what model / llm are you"
}
],
aws_access_key_id="",
aws_secret_access_key="",
aws_region_name="us-west-2",
max_tokens = 10,
)
print(response)
# response = openai.ChatCompletion.create(
# model = "gpt-3.5-turbo",
# messages = [
# {
# "role": "user",
# "content": "this is a test message, what model / llm are you"
# }
# ],
# max_tokens = 10,
# stream=True
# )
# for chunk in response:
# print(chunk)

View file

@ -0,0 +1,39 @@
import openai
openai.api_base = "http://127.0.0.1:8000"
openai.api_key = "this can be anything"
print("making request")
api_key = ""
response = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test message, what model / llm are you"
}
],
api_key=api_key,
max_tokens = 10,
)
print(response)
response = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test message, what model / llm are you"
}
],
api_key=api_key,
max_tokens = 10,
stream=True
)
for chunk in response:
print(chunk)

View file

@ -0,0 +1,38 @@
import openai
openai.api_base = "http://127.0.0.1:8000"
openai.api_key = "this can be anything"
print("making request")
api_key = ""
response = openai.ChatCompletion.create(
model = "openrouter/google/palm-2-chat-bison",
messages = [
{
"role": "user",
"content": "this is a test message, what model / llm are you"
}
],
api_key=api_key,
max_tokens = 10,
)
print(response)
response = openai.ChatCompletion.create(
model = "openrouter/google/palm-2-chat-bison",
messages = [
{
"role": "user",
"content": "this is a test message, what model / llm are you"
}
],
api_key=api_key,
max_tokens = 10,
stream=True
)
for chunk in response:
print(chunk)