push cli tool

2023-09-26 13:30:35 -07:00 · 2023-09-26 13:30:35 -07:00 · a72880925c
commit a72880925c
parent a364b36f9e
20 changed files with 86 additions and 7 deletions
--- a/dist/litellm-0.1.766-py3-none-any.whl
+++ b/dist/litellm-0.1.766-py3-none-any.whl
--- a/dist/litellm-0.1.766.tar.gz
+++ b/dist/litellm-0.1.766.tar.gz
--- a/dist/litellm-0.1.767-py3-none-any.whl
+++ b/dist/litellm-0.1.767-py3-none-any.whl
--- a/dist/litellm-0.1.767.tar.gz
+++ b/dist/litellm-0.1.767.tar.gz
--- a/dist/litellm-0.1.768-py3-none-any.whl
+++ b/dist/litellm-0.1.768-py3-none-any.whl
--- a/dist/litellm-0.1.768.tar.gz
+++ b/dist/litellm-0.1.768.tar.gz
--- a/dist/litellm-0.1.7681-py3-none-any.whl
+++ b/dist/litellm-0.1.7681-py3-none-any.whl
--- a/dist/litellm-0.1.7681.tar.gz
+++ b/dist/litellm-0.1.7681.tar.gz
--- a/dist/litellm-0.1.7682-py3-none-any.whl
+++ b/dist/litellm-0.1.7682-py3-none-any.whl
--- a/dist/litellm-0.1.7682.tar.gz
+++ b/dist/litellm-0.1.7682.tar.gz
--- a/litellm/init.py
+++ b/litellm/init.py
@ -322,3 +322,4 @@ from .exceptions import (

 )
 from .budget_manager import BudgetManager
+from .proxy import run_server
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -1,6 +1,6 @@
 import requests
 import json
-
+import traceback
 try:
    from async_generator import async_generator, yield_  # optional dependency
    async_generator_imported = True
@ -13,6 +13,9 @@ def get_ollama_response_stream(
        model="llama2",
        prompt="Why is the sky blue?"
    ):
+    if api_base.endswith("/api/generate"):
+        url = api_base
+    else: 
        url = f"{api_base}/api/generate"
    data = {
        "model": model,
@ -37,6 +40,7 @@ def get_ollama_response_stream(
                                completion_obj["content"] = j["response"]
                                yield {"choices": [{"delta": completion_obj}]}
                except Exception as e:
+                    traceback.print_exc()
                    print(f"Error decoding JSON: {e}")
    session.close()

--- a/litellm/proxy.py
+++ b/litellm/proxy.py
@ -0,0 +1,57 @@
+import litellm
+import click, json
+from dotenv import load_dotenv
+load_dotenv()
+try:
+    from fastapi import FastAPI, Request, status, HTTPException, Depends
+    from fastapi.responses import StreamingResponse
+except:
+    raise ImportError("FastAPI needs to be imported. Run - `pip install fastapi`")
+
+try:
+    import uvicorn
+except:
+    raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`")
+
+app = FastAPI()
+user_api_base = None
+user_model = None
+
+
+# for streaming
+def data_generator(response):
+    for chunk in response:
+        yield f"data: {json.dumps(chunk)}\n\n"
+
+@app.get("/models") # if project requires model list 
+def model_list(): 
+    return dict(
+        data=[
+            {"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"}
+        ],
+        object="list",
+    )
+
+@app.post("/chat/completions")
+async def completion(request: Request):
+    data = await request.json()
+    if (user_model is None):
+        raise ValueError("Proxy model needs to be set")
+    data["model"] = user_model
+    if user_api_base:
+        data["api_base"] = user_api_base
+    response = litellm.completion(**data)
+    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+            return StreamingResponse(data_generator(response), media_type='text/event-stream')
+    return response
+
+
+@click.command()
+@click.option('--port', default=8000, help='Port to bind the server to.')
+@click.option('--api_base',default=None, help='API base URL.')
+@click.option('--model', required=True, help='The model name to pass to litellm expects') 
+def run_server(port, api_base, model):
+    global user_api_base, user_model
+    user_api_base = api_base
+    user_model = model
+    uvicorn.run(app, host='0.0.0.0', port=port)
--- a/litellm/tests/test_ollama_local.py
+++ b/litellm/tests/test_ollama_local.py
@ -47,7 +47,7 @@
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")

-# # test_completion_ollama_stream()
+# test_completion_ollama_stream()


 # async def test_completion_ollama_async_stream():
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2807,7 +2807,6 @@ class CustomStreamWrapper:

    def handle_anthropic_chunk(self, chunk):
        str_line = chunk.decode("utf-8")  # Convert bytes to string
-        print(f"str_line: {str_line}")
        text = "" 
        is_finished = False
        finish_reason = None
--- a/poetry.lock
+++ b/poetry.lock
@ -246,6 +246,20 @@ files = [
    {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
 ]

+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@ -1060,4 +1074,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8"
-content-hash = "3bbeb410e3403ab479dd1cf37d57b25e6e498e93e84f684109c7217fb94e96aa"
+content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.765"
+version = "0.1.769"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"
@ -13,6 +13,10 @@ python-dotenv = ">=0.2.0"
 tiktoken = ">=0.4.0"
 importlib-metadata = ">=6.8.0"
 tokenizers = "*"
+click = "*"
+
+[tool.poetry.scripts]
+litellm = 'litellm:run_server'

 [build-system]
 requires = ["poetry-core"]