(feat) proxy added tests

2023-10-21 12:33:35 -07:00 · 2023-10-21 12:33:35 -07:00 · e5e82c7474
commit e5e82c7474
parent 2bd9b4acd8
5 changed files with 116 additions and 311 deletions
--- a/litellm-proxy/openapi.json
+++ b/litellm-proxy/openapi.json
@ -1,237 +0,0 @@
 {
  "openapi": "3.0.0",
  "info": {
    "version": "1.0.0",
    "title": "LiteLLM API",
    "description": "API for LiteLLM"
  },
  "paths": {
   "/chat/completions": {
  "post": {
    "summary": "Create chat completion for 100+ LLM APIs",
    "requestBody": {
      "required": true,
      "content": {
        "application/json": {
          "schema": {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "ID of the model to use"
              },
              "messages": {
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "role": {
                      "type": "string",
                      "description": "The role of the message's author"
                    },
                    "content": {
                      "type": "string",
                      "description": "The contents of the message"
                    },
                    "name": {
                      "type": "string",
                      "description": "The name of the author of the message"
                    },
                    "function_call": {
                      "type": "object",
                      "description": "The name and arguments of a function that should be called"
                    }
                  }
                }
              },
              "functions": {
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "name": {
                      "type": "string",
                      "description": "The name of the function to be called"
                    },
                    "description": {
                      "type": "string",
                      "description": "A description explaining what the function does"
                    },
                    "parameters": {
                      "type": "object",
                      "description": "The parameters that the function accepts"
                    },
                    "function_call": {
                      "type": "string",
                      "description": "Controls how the model responds to function calls"
                    }
                  }
                }
              },
              "temperature": {
                "type": "number",
                "description": "The sampling temperature to be used"
              },
              "top_p": {
                "type": "number",
                "description": "An alternative to sampling with temperature"
              },
              "n": {
                "type": "integer",
                "description": "The number of chat completion choices to generate for each input message"
              },
              "stream": {
                "type": "boolean",
                "description": "If set to true, it sends partial message deltas"
              },
              "stop": {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "description": "Up to 4 sequences where the API will stop generating further tokens"
              },
              "max_tokens": {
                "type": "integer",
                "description": "The maximum number of tokens to generate in the chat completion"
              },
              "presence_penalty": {
                "type": "number",
                "description": "It is used to penalize new tokens based on their existence in the text so far"
              },
              "frequency_penalty": {
                "type": "number",
                "description": "It is used to penalize new tokens based on their frequency in the text so far"
              },
              "logit_bias": {
                "type": "object",
                "description": "Used to modify the probability of specific tokens appearing in the completion"
              },
              "user": {
                "type": "string",
                "description": "A unique identifier representing your end-user"
              }
            }
          }
        }
      }
    },
    "responses": {
      "200": {
        "description": "Successful operation",
        "content": {
          "application/json": {
            "schema": {
              "type": "object",
              "properties": {
                "choices": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "properties": {
                      "finish_reason": {
                        "type": "string"
                      },
                      "index": {
                        "type": "integer"
                      },
                      "message": {
                        "type": "object",
                        "properties": {
                          "role": {
                            "type": "string"
                          },
                          "content": {
                            "type": "string"
                          }
                        }
                      }
                    }
                  }
                },
                "created": {
                  "type": "string"
                },
                "model": {
                  "type": "string"
                },
                "usage": {
                  "type": "object",
                  "properties": {
                    "prompt_tokens": {
                      "type": "integer"
                    },
                    "completion_tokens": {
                      "type": "integer"
                    },
                    "total_tokens": {
                      "type": "integer"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "500": {
        "description": "Server error"
      }
    }
  }
 },
    "/completions": {
      "post": {
        "summary": "Create completion",
        "responses": {
          "200": {
            "description": "Successful operation"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    },
    "/models": {
      "get": {
        "summary": "Get models",
        "responses": {
          "200": {
            "description": "Successful operation"
          }
        }
      }
    },
    "/ollama_logs": {
      "get": {
        "summary": "Retrieve server logs for ollama models",
        "responses": {
          "200": {
            "description": "Successful operation",
            "content": {
              "application/octet-stream": {
                "schema": {
                  "type": "string",
                  "format": "binary"
                }
              }
            }
          }
        }
      }
    },
    "/": {
      "get": {
        "summary": "Home",
        "responses": {
          "200": {
            "description": "Successful operation"
          }
        }
      }
    }
  }
 }
--- a/litellm-proxy/proxy_server.py
+++ b/litellm-proxy/proxy_server.py
@ -1,74 +0,0 @@
 import litellm
 from fastapi import FastAPI, Request
 from fastapi.routing import APIRouter
 from fastapi.responses import StreamingResponse, FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 import json
 app = FastAPI(docs_url="/", title="LiteLLM API")
 router = APIRouter()
 origins = ["*"]
 app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 #### API ENDPOINTS ####
@router.post("/v1/models")
@router.get("/models")  # if project requires model list
 def model_list():
    all_models = litellm.utils.get_valid_models()
    return dict(
        data=[
            {
                "id": model,
                "object": "model",
                "created": 1677610602,
                "owned_by": "openai",
            }
            for model in all_models
        ],
        object="list",
    )
 # for streaming
 def data_generator(response):
    print("inside generator")
    for chunk in response:
        print(f"returned chunk: {chunk}")
        yield f"data: {json.dumps(chunk)}\n\n"
@router.post("/v1/completions")
@router.post("/completions")
 async def completion(request: Request):
    data = await request.json()
    response = litellm.completion(
        **data
    )
    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
            return StreamingResponse(data_generator(response), media_type='text/event-stream')
    return response
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
 async def chat_completion(request: Request):
    data = await request.json()
    response = litellm.completion(
        **data
    )
    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
            return StreamingResponse(data_generator(response), media_type='text/event-stream')
    return response
@router.get("/")
 async def home(request: Request):
    return "LiteLLM: RUNNING"
 app.include_router(router)
--- a/litellm-proxy/tests/test_bedrock.py
+++ b/litellm-proxy/tests/test_bedrock.py
@ -0,0 +1,39 @@
 import openai
 openai.api_base = "http://127.0.0.1:8000"
 print("making request")
 openai.api_key = "anything" # this gets passed as a header 
 response = openai.ChatCompletion.create(
    model = "bedrock/anthropic.claude-instant-v1",
    messages = [
        {
            "role": "user",
            "content": "this is a test message, what model / llm are you"
        }
    ],
    aws_access_key_id="",
    aws_secret_access_key="",
    aws_region_name="us-west-2",
    max_tokens = 10,
 )
 print(response)
 # response = openai.ChatCompletion.create(
 #     model = "gpt-3.5-turbo",
 #     messages = [
 #         {
 #             "role": "user",
 #             "content": "this is a test message, what model / llm are you"
 #         }
 #     ],
 #     max_tokens = 10,
 #     stream=True
 # )
 # for chunk in response:
 #     print(chunk)
--- a/litellm-proxy/tests/test_openai.py
+++ b/litellm-proxy/tests/test_openai.py
@ -0,0 +1,39 @@
 import openai
 openai.api_base = "http://127.0.0.1:8000"
 openai.api_key = "this can be anything"
 print("making request")
 api_key = "" 
 response = openai.ChatCompletion.create(
    model = "gpt-3.5-turbo",
    messages = [
        {
            "role": "user",
            "content": "this is a test message, what model / llm are you"
        }
    ],
    api_key=api_key,
    max_tokens = 10,
 )
 print(response)
 response = openai.ChatCompletion.create(
    model = "gpt-3.5-turbo",
    messages = [
        {
            "role": "user",
            "content": "this is a test message, what model / llm are you"
        }
    ],
    api_key=api_key,
    max_tokens = 10,
    stream=True
 )
 for chunk in response:
    print(chunk)
--- a/litellm-proxy/tests/test_openrouter.py
+++ b/litellm-proxy/tests/test_openrouter.py
@ -0,0 +1,38 @@
 import openai
 openai.api_base = "http://127.0.0.1:8000"
 openai.api_key = "this can be anything"
 print("making request")
 api_key = ""
 response = openai.ChatCompletion.create(
    model = "openrouter/google/palm-2-chat-bison",
    messages = [
        {
            "role": "user",
            "content": "this is a test message, what model / llm are you"
        }
    ],
    api_key=api_key,
    max_tokens = 10,
 )
 print(response)
 response = openai.ChatCompletion.create(
    model = "openrouter/google/palm-2-chat-bison",
    messages = [
        {
            "role": "user",
            "content": "this is a test message, what model / llm are you"
        }
    ],
    api_key=api_key,
    max_tokens = 10,
    stream=True
 )
 for chunk in response:
    print(chunk)