From 51e5e2b8d500b246f61b852a8eb0bd21012ccc04 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 7 Oct 2023 17:28:55 -0700 Subject: [PATCH] docs(proxy_server): doc cleanup --- .gitignore | 3 +- docs/my-website/docs/proxy_server.md | 151 +++++++++++++++------------ litellm/proxy/proxy_server.py | 19 +++- 3 files changed, 105 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index 43c253051..836330d07 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ litellm_uuid.txt __pycache__/ bun.lockb -**/.DS_Store \ No newline at end of file +**/.DS_Store +.aider* diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md index 13fe5243c..263bee172 100644 --- a/docs/my-website/docs/proxy_server.md +++ b/docs/my-website/docs/proxy_server.md @@ -3,38 +3,25 @@ import TabItem from '@theme/TabItem'; # OpenAI Proxy Server -CLI Tool to create a LLM Proxy Server to translate openai api calls to any non-openai model (e.g. Huggingface, TogetherAI, Ollama, etc.) 100+ models [Provider List](https://docs.litellm.ai/docs/providers). +A local, fast, and lightweight OpenAI-compatible server to call 100+ LLM APIs. -## Quick start -Call Ollama models through your OpenAI proxy. - -### Start Proxy +## usage ```shell -$ pip install litellm +pip install litellm ``` ```shell -$ litellm --model ollama/llama2 +$ litellm --model ollama/codellama -#INFO: Uvicorn running on http://0.0.0.0:8000 +#INFO: Ollama running on http://0.0.0.0:8000 ``` -This will host a local proxy api at: **http://0.0.0.0:8000** +### test +In a new shell, run: +```shell +$ litellm --test +``` -Let's see if it works -```shell -$ curl --location 'http://0.0.0.0:8000/chat/completions' \ ---header 'Content-Type: application/json' \ ---data '{ - "messages": [ - { - "role": "user", - "content": "what do you know?" - } - ], -}' -``` - -### Replace OpenAI Base +### replace openai base ```python import openai @@ -145,6 +132,81 @@ $ litellm --model command-nightly [**Jump to Code**](https://github.com/BerriAI/litellm/blob/fef4146396d5d87006259e00095a62e3900d6bb4/litellm/proxy.py#L36) +## [tutorial]: Use with Aider/AutoGen/Continue-Dev + +Here's how to use the proxy to test codellama/mistral/etc. models for different github repos + +```shell +pip install litellm +``` + +```shell +$ ollama pull codellama # OUR Local CodeLlama + +$ litellm --model ollama/codellama --temperature 0.3 --max_tokens 2048 +``` + +Implementation for different repos + + + +```shell +$ pip install aider + +$ aider --openai-api-base http://0.0.0.0:8000 --openai-api-key fake-key +``` + + + +Continue-Dev brings ChatGPT to VSCode. See how to [install it here](https://continue.dev/docs/quickstart). + +In the [config.py](https://continue.dev/docs/reference/Models/openai) set this as your default model. +```python + default=OpenAI( + api_key="IGNORED", + model="fake-model-name", + context_length=2048, + api_base="http://your_litellm_hostname:8000" + ), +``` + +Credits [@vividfog](https://github.com/jmorganca/ollama/issues/305#issuecomment-1751848077) for this tutorial. + + + +```python +pip install pyautogen +``` + +```python +from autogen import AssistantAgent, UserProxyAgent, oai +config_list=[ + { + "model": "my-fake-model", + "api_base": "http://localhost:8000/v1", #litellm compatible endpoint + "api_type": "open_ai", + "api_key": "NULL", # just a placeholder + } +] + +response = oai.Completion.create(config_list=config_list, prompt="Hi") +print(response) # works fine + +assistant = AssistantAgent("assistant") +user_proxy = UserProxyAgent("user_proxy") +user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stock price change YTD.", config_list=config_list) +# fails with the error: openai.error.AuthenticationError: No API key provided. +``` + +Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial. + + + +:::note +**Contribute** Using this server with a project? Contribute your tutorial here! + +::: + ## Configure Model To save api keys and/or customize model prompt, run: @@ -207,44 +269,3 @@ This will host a ChatCompletions API at: https://api.litellm.ai/44508ad4 - -## Tutorial - using HuggingFace LLMs with aider -[Aider](https://github.com/paul-gauthier/aider) is an AI pair programming in your terminal. - -But it only accepts OpenAI API Calls. - -In this tutorial we'll use Aider with WizardCoder (hosted on HF Inference Endpoints). - -[NOTE]: To learn how to deploy a model on Huggingface - -### Step 1: Install aider and litellm -```shell -$ pip install aider-chat litellm -``` - -### Step 2: Spin up local proxy -Save your huggingface api key in your local environment (can also do this via .env) - -```shell -$ export HUGGINGFACE_API_KEY=my-huggingface-api-key -``` - -Point your local proxy to your model endpoint - -```shell -$ litellm \ - --model huggingface/WizardLM/WizardCoder-Python-34B-V1.0 \ - --api_base https://my-endpoint.huggingface.com -``` -This will host a local proxy api at: **http://0.0.0.0:8000** - -### Step 3: Replace openai api base in Aider -Aider lets you set the openai api base. So lets point it to our proxy instead. - -```shell -$ aider --openai-api-base http://0.0.0.0:8000 -``` - - - -And that's it! \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index c2e7334af..ef0ecbbf2 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1,4 +1,4 @@ -import sys, os +import sys, os, platform sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path @@ -19,7 +19,7 @@ print() import litellm from fastapi import FastAPI, Request from fastapi.routing import APIRouter -from fastapi.responses import StreamingResponse +from fastapi.responses import StreamingResponse, FileResponse import json app = FastAPI() @@ -203,4 +203,19 @@ async def chat_completion(request: Request): print_verbose(f"response: {response}") return response + +@router.get("/ollama_logs") +async def retrieve_server_log(request: Request): + filepath = os.path.expanduser('~/.ollama/logs/server.log') + return FileResponse(filepath) + +# @router.get("/ollama_logs") +# async def chat_completion(request: Request): +# if platform.system() == "Darwin": +# print("This is a MacOS system.") +# elif platform.system() == "Linux": +# print("This is a Linux system.") +# else: +# print("This is an unknown operating system.") + app.include_router(router) \ No newline at end of file