forked from phoenix/litellm-mirror
push cli tool
This commit is contained in:
parent
a364b36f9e
commit
a72880925c
20 changed files with 86 additions and 7 deletions
BIN
dist/litellm-0.1.766-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.766-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.766.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.766.tar.gz
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.767-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.767-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.767.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.767.tar.gz
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.768-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.768-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.768.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.768.tar.gz
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.7681-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.7681-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.7681.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.7681.tar.gz
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.7682-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.7682-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.7682.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.7682.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -321,4 +321,5 @@ from .exceptions import (
|
|||
BudgetExceededError
|
||||
|
||||
)
|
||||
from .budget_manager import BudgetManager
|
||||
from .budget_manager import BudgetManager
|
||||
from .proxy import run_server
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,6 +1,6 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
import traceback
|
||||
try:
|
||||
from async_generator import async_generator, yield_ # optional dependency
|
||||
async_generator_imported = True
|
||||
|
@ -13,7 +13,10 @@ def get_ollama_response_stream(
|
|||
model="llama2",
|
||||
prompt="Why is the sky blue?"
|
||||
):
|
||||
url = f"{api_base}/api/generate"
|
||||
if api_base.endswith("/api/generate"):
|
||||
url = api_base
|
||||
else:
|
||||
url = f"{api_base}/api/generate"
|
||||
data = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
|
@ -37,6 +40,7 @@ def get_ollama_response_stream(
|
|||
completion_obj["content"] = j["response"]
|
||||
yield {"choices": [{"delta": completion_obj}]}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print(f"Error decoding JSON: {e}")
|
||||
session.close()
|
||||
|
||||
|
|
57
litellm/proxy.py
Normal file
57
litellm/proxy.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import litellm
|
||||
import click, json
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
try:
|
||||
from fastapi import FastAPI, Request, status, HTTPException, Depends
|
||||
from fastapi.responses import StreamingResponse
|
||||
except:
|
||||
raise ImportError("FastAPI needs to be imported. Run - `pip install fastapi`")
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
except:
|
||||
raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`")
|
||||
|
||||
app = FastAPI()
|
||||
user_api_base = None
|
||||
user_model = None
|
||||
|
||||
|
||||
# for streaming
|
||||
def data_generator(response):
|
||||
for chunk in response:
|
||||
yield f"data: {json.dumps(chunk)}\n\n"
|
||||
|
||||
@app.get("/models") # if project requires model list
|
||||
def model_list():
|
||||
return dict(
|
||||
data=[
|
||||
{"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"}
|
||||
],
|
||||
object="list",
|
||||
)
|
||||
|
||||
@app.post("/chat/completions")
|
||||
async def completion(request: Request):
|
||||
data = await request.json()
|
||||
if (user_model is None):
|
||||
raise ValueError("Proxy model needs to be set")
|
||||
data["model"] = user_model
|
||||
if user_api_base:
|
||||
data["api_base"] = user_api_base
|
||||
response = litellm.completion(**data)
|
||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||
return response
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--port', default=8000, help='Port to bind the server to.')
|
||||
@click.option('--api_base',default=None, help='API base URL.')
|
||||
@click.option('--model', required=True, help='The model name to pass to litellm expects')
|
||||
def run_server(port, api_base, model):
|
||||
global user_api_base, user_model
|
||||
user_api_base = api_base
|
||||
user_model = model
|
||||
uvicorn.run(app, host='0.0.0.0', port=port)
|
|
@ -47,7 +47,7 @@
|
|||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# # test_completion_ollama_stream()
|
||||
# test_completion_ollama_stream()
|
||||
|
||||
|
||||
# async def test_completion_ollama_async_stream():
|
||||
|
|
|
@ -2807,7 +2807,6 @@ class CustomStreamWrapper:
|
|||
|
||||
def handle_anthropic_chunk(self, chunk):
|
||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||
print(f"str_line: {str_line}")
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = None
|
||||
|
|
16
poetry.lock
generated
16
poetry.lock
generated
|
@ -246,6 +246,20 @@ files = [
|
|||
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.1.7"
|
||||
description = "Composable command line interface toolkit"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
|
||||
{file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
|
@ -1060,4 +1074,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "3bbeb410e3403ab479dd1cf37d57b25e6e498e93e84f684109c7217fb94e96aa"
|
||||
content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.765"
|
||||
version = "0.1.769"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
@ -13,6 +13,10 @@ python-dotenv = ">=0.2.0"
|
|||
tiktoken = ">=0.4.0"
|
||||
importlib-metadata = ">=6.8.0"
|
||||
tokenizers = "*"
|
||||
click = "*"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
litellm = 'litellm:run_server'
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue