(feat) proxy added tests

2023-10-21 12:33:35 -07:00 · 2023-10-21 12:33:35 -07:00 · e5e82c7474
commit e5e82c7474
parent 2bd9b4acd8
5 changed files with 116 additions and 311 deletions
--- a/litellm-proxy/openapi.json
+++ b/litellm-proxy/openapi.json
@ -1,237 +0,0 @@
-{
-  "openapi": "3.0.0",
-  "info": {
-    "version": "1.0.0",
-    "title": "LiteLLM API",
-    "description": "API for LiteLLM"
-  },
-  "paths": {
-   "/chat/completions": {
-  "post": {
-    "summary": "Create chat completion for 100+ LLM APIs",
-    "requestBody": {
-      "required": true,
-      "content": {
-        "application/json": {
-          "schema": {
-            "type": "object",
-            "properties": {
-              "model": {
-                "type": "string",
-                "description": "ID of the model to use"
-              },
-              "messages": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "role": {
-                      "type": "string",
-                      "description": "The role of the message's author"
-                    },
-                    "content": {
-                      "type": "string",
-                      "description": "The contents of the message"
-                    },
-                    "name": {
-                      "type": "string",
-                      "description": "The name of the author of the message"
-                    },
-                    "function_call": {
-                      "type": "object",
-                      "description": "The name and arguments of a function that should be called"
-                    }
-                  }
-                }
-              },
-              "functions": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "name": {
-                      "type": "string",
-                      "description": "The name of the function to be called"
-                    },
-                    "description": {
-                      "type": "string",
-                      "description": "A description explaining what the function does"
-                    },
-                    "parameters": {
-                      "type": "object",
-                      "description": "The parameters that the function accepts"
-                    },
-                    "function_call": {
-                      "type": "string",
-                      "description": "Controls how the model responds to function calls"
-                    }
-                  }
-                }
-              },
-              "temperature": {
-                "type": "number",
-                "description": "The sampling temperature to be used"
-              },
-              "top_p": {
-                "type": "number",
-                "description": "An alternative to sampling with temperature"
-              },
-              "n": {
-                "type": "integer",
-                "description": "The number of chat completion choices to generate for each input message"
-              },
-              "stream": {
-                "type": "boolean",
-                "description": "If set to true, it sends partial message deltas"
-              },
-              "stop": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                },
-                "description": "Up to 4 sequences where the API will stop generating further tokens"
-              },
-              "max_tokens": {
-                "type": "integer",
-                "description": "The maximum number of tokens to generate in the chat completion"
-              },
-              "presence_penalty": {
-                "type": "number",
-                "description": "It is used to penalize new tokens based on their existence in the text so far"
-              },
-              "frequency_penalty": {
-                "type": "number",
-                "description": "It is used to penalize new tokens based on their frequency in the text so far"
-              },
-              "logit_bias": {
-                "type": "object",
-                "description": "Used to modify the probability of specific tokens appearing in the completion"
-              },
-              "user": {
-                "type": "string",
-                "description": "A unique identifier representing your end-user"
-              }
-            }
-          }
-        }
-      }
-    },
-    "responses": {
-      "200": {
-        "description": "Successful operation",
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "choices": {
-                  "type": "array",
-                  "items": {
-                    "type": "object",
-                    "properties": {
-                      "finish_reason": {
-                        "type": "string"
-                      },
-                      "index": {
-                        "type": "integer"
-                      },
-                      "message": {
-                        "type": "object",
-                        "properties": {
-                          "role": {
-                            "type": "string"
-                          },
-                          "content": {
-                            "type": "string"
-                          }
-                        }
-                      }
-                    }
-                  }
-                },
-                "created": {
-                  "type": "string"
-                },
-                "model": {
-                  "type": "string"
-                },
-                "usage": {
-                  "type": "object",
-                  "properties": {
-                    "prompt_tokens": {
-                      "type": "integer"
-                    },
-                    "completion_tokens": {
-                      "type": "integer"
-                    },
-                    "total_tokens": {
-                      "type": "integer"
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      },
-      "500": {
-        "description": "Server error"
-      }
-    }
-  }
-},
-
-    "/completions": {
-      "post": {
-        "summary": "Create completion",
-        "responses": {
-          "200": {
-            "description": "Successful operation"
-          },
-          "500": {
-            "description": "Server error"
-          }
-        }
-      }
-    },
-    "/models": {
-      "get": {
-        "summary": "Get models",
-        "responses": {
-          "200": {
-            "description": "Successful operation"
-          }
-        }
-      }
-    },
-
-    "/ollama_logs": {
-      "get": {
-        "summary": "Retrieve server logs for ollama models",
-        "responses": {
-          "200": {
-            "description": "Successful operation",
-            "content": {
-              "application/octet-stream": {
-                "schema": {
-                  "type": "string",
-                  "format": "binary"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/": {
-      "get": {
-        "summary": "Home",
-        "responses": {
-          "200": {
-            "description": "Successful operation"
-          }
-        }
-      }
-    }
-  }
-}
--- a/litellm-proxy/proxy_server.py
+++ b/litellm-proxy/proxy_server.py
@ -1,74 +0,0 @@
-
-import litellm
-from fastapi import FastAPI, Request
-from fastapi.routing import APIRouter
-from fastapi.responses import StreamingResponse, FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-import json
-
-app = FastAPI(docs_url="/", title="LiteLLM API")
-router = APIRouter()
-origins = ["*"]
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-#### API ENDPOINTS ####
-@router.post("/v1/models")
-@router.get("/models")  # if project requires model list
-def model_list():
-    all_models = litellm.utils.get_valid_models()
-    return dict(
-        data=[
-            {
-                "id": model,
-                "object": "model",
-                "created": 1677610602,
-                "owned_by": "openai",
-            }
-            for model in all_models
-        ],
-        object="list",
-    )
-# for streaming
-def data_generator(response):
-    print("inside generator")
-    for chunk in response:
-        print(f"returned chunk: {chunk}")
-        yield f"data: {json.dumps(chunk)}\n\n"
-
-@router.post("/v1/completions")
-@router.post("/completions")
-async def completion(request: Request):
-    data = await request.json()
-    response = litellm.completion(
-        **data
-    )
-    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-            return StreamingResponse(data_generator(response), media_type='text/event-stream')
-    return response
-
-
-@router.post("/v1/chat/completions")
-@router.post("/chat/completions")
-async def chat_completion(request: Request):
-    data = await request.json()
-    response = litellm.completion(
-        **data
-    )
-    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-            return StreamingResponse(data_generator(response), media_type='text/event-stream')
-    return response
-
-@router.get("/")
-async def home(request: Request):
-    return "LiteLLM: RUNNING"
-
-
-app.include_router(router)
--- a/litellm-proxy/tests/test_bedrock.py
+++ b/litellm-proxy/tests/test_bedrock.py
@ -0,0 +1,39 @@
+import openai
+openai.api_base = "http://127.0.0.1:8000"
+print("making request")
+openai.api_key = "anything" # this gets passed as a header 
+
+
+response = openai.ChatCompletion.create(
+    model = "bedrock/anthropic.claude-instant-v1",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    aws_access_key_id="",
+    aws_secret_access_key="",
+    aws_region_name="us-west-2",
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+# response = openai.ChatCompletion.create(
+#     model = "gpt-3.5-turbo",
+#     messages = [
+#         {
+#             "role": "user",
+#             "content": "this is a test message, what model / llm are you"
+#         }
+#     ],
+#     max_tokens = 10,
+#     stream=True
+# )
+
+
+# for chunk in response:
+#     print(chunk)
--- a/litellm-proxy/tests/test_openai.py
+++ b/litellm-proxy/tests/test_openai.py
@ -0,0 +1,39 @@
+import openai
+openai.api_base = "http://127.0.0.1:8000"
+openai.api_key = "this can be anything"
+
+print("making request")
+
+api_key = "" 
+response = openai.ChatCompletion.create(
+    model = "gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+response = openai.ChatCompletion.create(
+    model = "gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+    stream=True
+)
+
+
+for chunk in response:
+    print(chunk)
--- a/litellm-proxy/tests/test_openrouter.py
+++ b/litellm-proxy/tests/test_openrouter.py
@ -0,0 +1,38 @@
+import openai
+openai.api_base = "http://127.0.0.1:8000"
+openai.api_key = "this can be anything"
+print("making request")
+
+api_key = ""
+response = openai.ChatCompletion.create(
+    model = "openrouter/google/palm-2-chat-bison",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+)
+
+
+print(response)
+
+
+response = openai.ChatCompletion.create(
+    model = "openrouter/google/palm-2-chat-bison",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test message, what model / llm are you"
+        }
+    ],
+    api_key=api_key,
+    max_tokens = 10,
+    stream=True
+)
+
+
+for chunk in response:
+    print(chunk)