diff --git a/litellm-proxy/openapi.json b/litellm-proxy/openapi.json deleted file mode 100644 index 955171826..000000000 --- a/litellm-proxy/openapi.json +++ /dev/null @@ -1,237 +0,0 @@ -{ - "openapi": "3.0.0", - "info": { - "version": "1.0.0", - "title": "LiteLLM API", - "description": "API for LiteLLM" - }, - "paths": { - "/chat/completions": { - "post": { - "summary": "Create chat completion for 100+ LLM APIs", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "model": { - "type": "string", - "description": "ID of the model to use" - }, - "messages": { - "type": "array", - "items": { - "type": "object", - "properties": { - "role": { - "type": "string", - "description": "The role of the message's author" - }, - "content": { - "type": "string", - "description": "The contents of the message" - }, - "name": { - "type": "string", - "description": "The name of the author of the message" - }, - "function_call": { - "type": "object", - "description": "The name and arguments of a function that should be called" - } - } - } - }, - "functions": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the function to be called" - }, - "description": { - "type": "string", - "description": "A description explaining what the function does" - }, - "parameters": { - "type": "object", - "description": "The parameters that the function accepts" - }, - "function_call": { - "type": "string", - "description": "Controls how the model responds to function calls" - } - } - } - }, - "temperature": { - "type": "number", - "description": "The sampling temperature to be used" - }, - "top_p": { - "type": "number", - "description": "An alternative to sampling with temperature" - }, - "n": { - "type": "integer", - "description": "The number of chat completion choices to generate for each input message" - }, - "stream": { - "type": "boolean", - "description": "If set to true, it sends partial message deltas" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Up to 4 sequences where the API will stop generating further tokens" - }, - "max_tokens": { - "type": "integer", - "description": "The maximum number of tokens to generate in the chat completion" - }, - "presence_penalty": { - "type": "number", - "description": "It is used to penalize new tokens based on their existence in the text so far" - }, - "frequency_penalty": { - "type": "number", - "description": "It is used to penalize new tokens based on their frequency in the text so far" - }, - "logit_bias": { - "type": "object", - "description": "Used to modify the probability of specific tokens appearing in the completion" - }, - "user": { - "type": "string", - "description": "A unique identifier representing your end-user" - } - } - } - } - } - }, - "responses": { - "200": { - "description": "Successful operation", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "choices": { - "type": "array", - "items": { - "type": "object", - "properties": { - "finish_reason": { - "type": "string" - }, - "index": { - "type": "integer" - }, - "message": { - "type": "object", - "properties": { - "role": { - "type": "string" - }, - "content": { - "type": "string" - } - } - } - } - } - }, - "created": { - "type": "string" - }, - "model": { - "type": "string" - }, - "usage": { - "type": "object", - "properties": { - "prompt_tokens": { - "type": "integer" - }, - "completion_tokens": { - "type": "integer" - }, - "total_tokens": { - "type": "integer" - } - } - } - } - } - } - } - }, - "500": { - "description": "Server error" - } - } - } -}, - - "/completions": { - "post": { - "summary": "Create completion", - "responses": { - "200": { - "description": "Successful operation" - }, - "500": { - "description": "Server error" - } - } - } - }, - "/models": { - "get": { - "summary": "Get models", - "responses": { - "200": { - "description": "Successful operation" - } - } - } - }, - - "/ollama_logs": { - "get": { - "summary": "Retrieve server logs for ollama models", - "responses": { - "200": { - "description": "Successful operation", - "content": { - "application/octet-stream": { - "schema": { - "type": "string", - "format": "binary" - } - } - } - } - } - } - }, - "/": { - "get": { - "summary": "Home", - "responses": { - "200": { - "description": "Successful operation" - } - } - } - } - } -} diff --git a/litellm-proxy/proxy_server.py b/litellm-proxy/proxy_server.py deleted file mode 100644 index a9aa07c2d..000000000 --- a/litellm-proxy/proxy_server.py +++ /dev/null @@ -1,74 +0,0 @@ - -import litellm -from fastapi import FastAPI, Request -from fastapi.routing import APIRouter -from fastapi.responses import StreamingResponse, FileResponse -from fastapi.middleware.cors import CORSMiddleware -import json - -app = FastAPI(docs_url="/", title="LiteLLM API") -router = APIRouter() -origins = ["*"] - -app.add_middleware( - CORSMiddleware, - allow_origins=origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -#### API ENDPOINTS #### -@router.post("/v1/models") -@router.get("/models") # if project requires model list -def model_list(): - all_models = litellm.utils.get_valid_models() - return dict( - data=[ - { - "id": model, - "object": "model", - "created": 1677610602, - "owned_by": "openai", - } - for model in all_models - ], - object="list", - ) -# for streaming -def data_generator(response): - print("inside generator") - for chunk in response: - print(f"returned chunk: {chunk}") - yield f"data: {json.dumps(chunk)}\n\n" - -@router.post("/v1/completions") -@router.post("/completions") -async def completion(request: Request): - data = await request.json() - response = litellm.completion( - **data - ) - if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses - return StreamingResponse(data_generator(response), media_type='text/event-stream') - return response - - -@router.post("/v1/chat/completions") -@router.post("/chat/completions") -async def chat_completion(request: Request): - data = await request.json() - response = litellm.completion( - **data - ) - if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses - return StreamingResponse(data_generator(response), media_type='text/event-stream') - return response - -@router.get("/") -async def home(request: Request): - return "LiteLLM: RUNNING" - - -app.include_router(router) diff --git a/litellm-proxy/tests/test_bedrock.py b/litellm-proxy/tests/test_bedrock.py new file mode 100644 index 000000000..57816941c --- /dev/null +++ b/litellm-proxy/tests/test_bedrock.py @@ -0,0 +1,39 @@ +import openai +openai.api_base = "http://127.0.0.1:8000" +print("making request") +openai.api_key = "anything" # this gets passed as a header + + +response = openai.ChatCompletion.create( + model = "bedrock/anthropic.claude-instant-v1", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + aws_access_key_id="", + aws_secret_access_key="", + aws_region_name="us-west-2", + max_tokens = 10, +) + + +print(response) + + +# response = openai.ChatCompletion.create( +# model = "gpt-3.5-turbo", +# messages = [ +# { +# "role": "user", +# "content": "this is a test message, what model / llm are you" +# } +# ], +# max_tokens = 10, +# stream=True +# ) + + +# for chunk in response: +# print(chunk) \ No newline at end of file diff --git a/litellm-proxy/tests/test_openai.py b/litellm-proxy/tests/test_openai.py new file mode 100644 index 000000000..dfbb2188e --- /dev/null +++ b/litellm-proxy/tests/test_openai.py @@ -0,0 +1,39 @@ +import openai +openai.api_base = "http://127.0.0.1:8000" +openai.api_key = "this can be anything" + +print("making request") + +api_key = "" +response = openai.ChatCompletion.create( + model = "gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + api_key=api_key, + max_tokens = 10, +) + + +print(response) + + +response = openai.ChatCompletion.create( + model = "gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + api_key=api_key, + max_tokens = 10, + stream=True +) + + +for chunk in response: + print(chunk) \ No newline at end of file diff --git a/litellm-proxy/tests/test_openrouter.py b/litellm-proxy/tests/test_openrouter.py new file mode 100644 index 000000000..1102e8e55 --- /dev/null +++ b/litellm-proxy/tests/test_openrouter.py @@ -0,0 +1,38 @@ +import openai +openai.api_base = "http://127.0.0.1:8000" +openai.api_key = "this can be anything" +print("making request") + +api_key = "" +response = openai.ChatCompletion.create( + model = "openrouter/google/palm-2-chat-bison", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + api_key=api_key, + max_tokens = 10, +) + + +print(response) + + +response = openai.ChatCompletion.create( + model = "openrouter/google/palm-2-chat-bison", + messages = [ + { + "role": "user", + "content": "this is a test message, what model / llm are you" + } + ], + api_key=api_key, + max_tokens = 10, + stream=True +) + + +for chunk in response: + print(chunk) \ No newline at end of file