diff --git a/dist/litellm-0.1.766-py3-none-any.whl b/dist/litellm-0.1.766-py3-none-any.whl new file mode 100644 index 000000000..510ec7782 Binary files /dev/null and b/dist/litellm-0.1.766-py3-none-any.whl differ diff --git a/dist/litellm-0.1.766.tar.gz b/dist/litellm-0.1.766.tar.gz new file mode 100644 index 000000000..ef04e1197 Binary files /dev/null and b/dist/litellm-0.1.766.tar.gz differ diff --git a/dist/litellm-0.1.767-py3-none-any.whl b/dist/litellm-0.1.767-py3-none-any.whl new file mode 100644 index 000000000..f2e035018 Binary files /dev/null and b/dist/litellm-0.1.767-py3-none-any.whl differ diff --git a/dist/litellm-0.1.767.tar.gz b/dist/litellm-0.1.767.tar.gz new file mode 100644 index 000000000..06dde10f1 Binary files /dev/null and b/dist/litellm-0.1.767.tar.gz differ diff --git a/dist/litellm-0.1.768-py3-none-any.whl b/dist/litellm-0.1.768-py3-none-any.whl new file mode 100644 index 000000000..6a2c8ca79 Binary files /dev/null and b/dist/litellm-0.1.768-py3-none-any.whl differ diff --git a/dist/litellm-0.1.768.tar.gz b/dist/litellm-0.1.768.tar.gz new file mode 100644 index 000000000..66d9ec4ce Binary files /dev/null and b/dist/litellm-0.1.768.tar.gz differ diff --git a/dist/litellm-0.1.7681-py3-none-any.whl b/dist/litellm-0.1.7681-py3-none-any.whl new file mode 100644 index 000000000..d7adc78fb Binary files /dev/null and b/dist/litellm-0.1.7681-py3-none-any.whl differ diff --git a/dist/litellm-0.1.7681.tar.gz b/dist/litellm-0.1.7681.tar.gz new file mode 100644 index 000000000..a90e45c98 Binary files /dev/null and b/dist/litellm-0.1.7681.tar.gz differ diff --git a/dist/litellm-0.1.7682-py3-none-any.whl b/dist/litellm-0.1.7682-py3-none-any.whl new file mode 100644 index 000000000..47b0ee45a Binary files /dev/null and b/dist/litellm-0.1.7682-py3-none-any.whl differ diff --git a/dist/litellm-0.1.7682.tar.gz b/dist/litellm-0.1.7682.tar.gz new file mode 100644 index 000000000..2719ae2d0 Binary files /dev/null and b/dist/litellm-0.1.7682.tar.gz differ diff --git a/litellm/__init__.py b/litellm/__init__.py index 5b48439b4..f02d26e7b 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -321,4 +321,5 @@ from .exceptions import ( BudgetExceededError ) -from .budget_manager import BudgetManager \ No newline at end of file +from .budget_manager import BudgetManager +from .proxy import run_server \ No newline at end of file diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 871c98cbb..240bff6fb 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 4ae00fb4e..c3ec8b87e 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 368e334d6..e5918250d 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 61680fc96..84bf98d32 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -1,6 +1,6 @@ import requests import json - +import traceback try: from async_generator import async_generator, yield_ # optional dependency async_generator_imported = True @@ -13,7 +13,10 @@ def get_ollama_response_stream( model="llama2", prompt="Why is the sky blue?" ): - url = f"{api_base}/api/generate" + if api_base.endswith("/api/generate"): + url = api_base + else: + url = f"{api_base}/api/generate" data = { "model": model, "prompt": prompt, @@ -37,6 +40,7 @@ def get_ollama_response_stream( completion_obj["content"] = j["response"] yield {"choices": [{"delta": completion_obj}]} except Exception as e: + traceback.print_exc() print(f"Error decoding JSON: {e}") session.close() diff --git a/litellm/proxy.py b/litellm/proxy.py new file mode 100644 index 000000000..6821a8560 --- /dev/null +++ b/litellm/proxy.py @@ -0,0 +1,57 @@ +import litellm +import click, json +from dotenv import load_dotenv +load_dotenv() +try: + from fastapi import FastAPI, Request, status, HTTPException, Depends + from fastapi.responses import StreamingResponse +except: + raise ImportError("FastAPI needs to be imported. Run - `pip install fastapi`") + +try: + import uvicorn +except: + raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`") + +app = FastAPI() +user_api_base = None +user_model = None + + +# for streaming +def data_generator(response): + for chunk in response: + yield f"data: {json.dumps(chunk)}\n\n" + +@app.get("/models") # if project requires model list +def model_list(): + return dict( + data=[ + {"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"} + ], + object="list", + ) + +@app.post("/chat/completions") +async def completion(request: Request): + data = await request.json() + if (user_model is None): + raise ValueError("Proxy model needs to be set") + data["model"] = user_model + if user_api_base: + data["api_base"] = user_api_base + response = litellm.completion(**data) + if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses + return StreamingResponse(data_generator(response), media_type='text/event-stream') + return response + + +@click.command() +@click.option('--port', default=8000, help='Port to bind the server to.') +@click.option('--api_base',default=None, help='API base URL.') +@click.option('--model', required=True, help='The model name to pass to litellm expects') +def run_server(port, api_base, model): + global user_api_base, user_model + user_api_base = api_base + user_model = model + uvicorn.run(app, host='0.0.0.0', port=port) \ No newline at end of file diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py index f00f1a0d9..d7247d05a 100644 --- a/litellm/tests/test_ollama_local.py +++ b/litellm/tests/test_ollama_local.py @@ -47,7 +47,7 @@ # except Exception as e: # pytest.fail(f"Error occurred: {e}") -# # test_completion_ollama_stream() +# test_completion_ollama_stream() # async def test_completion_ollama_async_stream(): diff --git a/litellm/utils.py b/litellm/utils.py index dd6c2378a..b77a927fb 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2807,7 +2807,6 @@ class CustomStreamWrapper: def handle_anthropic_chunk(self, chunk): str_line = chunk.decode("utf-8") # Convert bytes to string - print(f"str_line: {str_line}") text = "" is_finished = False finish_reason = None diff --git a/poetry.lock b/poetry.lock index 704e7b178..dc313bd6f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -246,6 +246,20 @@ files = [ {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, ] +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" @@ -1060,4 +1074,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "3bbeb410e3403ab479dd1cf37d57b25e6e498e93e84f684109c7217fb94e96aa" +content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd" diff --git a/pyproject.toml b/pyproject.toml index fe60dfb78..9ba00a04d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.765" +version = "0.1.769" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" @@ -13,6 +13,10 @@ python-dotenv = ">=0.2.0" tiktoken = ">=0.4.0" importlib-metadata = ">=6.8.0" tokenizers = "*" +click = "*" + +[tool.poetry.scripts] +litellm = 'litellm:run_server' [build-system] requires = ["poetry-core"]