forked from phoenix/litellm-mirror
update values
This commit is contained in:
parent
b8c8f01221
commit
dc9f02267a
8 changed files with 84 additions and 10 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -3,4 +3,4 @@
|
||||||
litellm_uuid.txt
|
litellm_uuid.txt
|
||||||
__pycache__/
|
__pycache__/
|
||||||
bun.lockb
|
bun.lockb
|
||||||
.DS_Store
|
.DS_Store
|
Binary file not shown.
Binary file not shown.
19
litellm/proxy/.env.template
Normal file
19
litellm/proxy/.env.template
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
### KEYS ###
|
||||||
|
# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
|
||||||
|
# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
|
||||||
|
# TOGETHER_API_KEY="" # Uncomment to save your TogetherAI API key
|
||||||
|
# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
|
||||||
|
# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
|
||||||
|
|
||||||
|
### MODEL CUSTOM PROMPT TEMPLATE ###
|
||||||
|
# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||||
|
# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
|
||||||
|
|
||||||
|
# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||||
|
# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
|
||||||
|
|
||||||
|
# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
|
||||||
|
# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
|
||||||
|
|
||||||
|
# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
|
||||||
|
# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
|
1
litellm/proxy/.gitignore
vendored
Normal file
1
litellm/proxy/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
.env
|
|
@ -1,18 +1,36 @@
|
||||||
import click
|
import click
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.option('--port', default=8000, help='Port to bind the server to.')
|
@click.option('--port', default=8000, help='Port to bind the server to.')
|
||||||
@click.option('--api_base', default=None, help='API base URL.')
|
@click.option('--api_base', default=None, help='API base URL.')
|
||||||
@click.option('--model', required=True, help='The model name to pass to litellm expects')
|
@click.option('--model', default=None, help='The model name to pass to litellm expects')
|
||||||
@click.option('--deploy', required=True, help='Get a deployed proxy endpoint - api.litellm.ai')
|
@click.option('--deploy', is_flag=True, help='Get a deployed proxy endpoint - api.litellm.ai')
|
||||||
@click.option('--debug', is_flag=True, help='To debug the input')
|
@click.option('--debug', is_flag=True, help='To debug the input')
|
||||||
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
||||||
@click.option('--max_tokens', default=None, help='Set max tokens for the model')
|
@click.option('--max_tokens', default=None, help='Set max tokens for the model')
|
||||||
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
||||||
def run_server(port, api_base, model, debug, temperature, max_tokens, telemetry):
|
@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
|
||||||
from .proxy_server import app, initialize
|
def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config):
|
||||||
|
if config:
|
||||||
|
if os.path.exists('.env.template'):
|
||||||
|
if not os.path.exists('.env'):
|
||||||
|
with open('.env.template', 'r') as source:
|
||||||
|
data = source.read()
|
||||||
|
with open('.env', 'w') as destination:
|
||||||
|
destination.write(data)
|
||||||
|
|
||||||
|
click.echo('Opening .env file...')
|
||||||
|
subprocess.call(['open', '.env']) # replace `open` with `start` on Windows
|
||||||
|
else:
|
||||||
|
click.echo('No .env.template file found.')
|
||||||
|
|
||||||
|
# from .proxy_server import app, initialize
|
||||||
|
from proxy_server import app, initialize
|
||||||
initialize(model, api_base, debug, temperature, max_tokens, telemetry)
|
initialize(model, api_base, debug, temperature, max_tokens, telemetry)
|
||||||
try:
|
try:
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
|
@ -33,7 +33,6 @@ user_telemetry = False
|
||||||
#### HELPER FUNCTIONS ####
|
#### HELPER FUNCTIONS ####
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
global user_debug
|
global user_debug
|
||||||
print(f"user_debug: {user_debug}")
|
|
||||||
if user_debug:
|
if user_debug:
|
||||||
print(print_statement)
|
print(print_statement)
|
||||||
|
|
||||||
|
@ -79,6 +78,26 @@ async def completion(request: Request):
|
||||||
data["model"] = user_model
|
data["model"] = user_model
|
||||||
if user_api_base:
|
if user_api_base:
|
||||||
data["api_base"] = user_api_base
|
data["api_base"] = user_api_base
|
||||||
|
## check for custom prompt template ##
|
||||||
|
litellm.register_prompt_template(
|
||||||
|
model=user_model,
|
||||||
|
roles={
|
||||||
|
"system": {
|
||||||
|
"pre_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),
|
||||||
|
},
|
||||||
|
"assistant": {
|
||||||
|
"pre_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "")
|
||||||
|
},
|
||||||
|
"user": {
|
||||||
|
"pre_message": os.getenv.get("MODEL_USER_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv.get("MODEL_USER_MESSAGE_END_TOKEN", "")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
initial_prompt_value=os.getenv.get("MODEL_PRE_PROMPT", ""),
|
||||||
|
final_prompt_value=os.getenv.get("MODEL_POST_PROMPT", "")
|
||||||
|
)
|
||||||
response = litellm.text_completion(**data)
|
response = litellm.text_completion(**data)
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
|
@ -91,7 +110,6 @@ async def chat_completion(request: Request):
|
||||||
if (user_model is None):
|
if (user_model is None):
|
||||||
raise ValueError("Proxy model needs to be set")
|
raise ValueError("Proxy model needs to be set")
|
||||||
data["model"] = user_model
|
data["model"] = user_model
|
||||||
|
|
||||||
# override with user settings
|
# override with user settings
|
||||||
if user_temperature:
|
if user_temperature:
|
||||||
data["temperature"] = user_temperature
|
data["temperature"] = user_temperature
|
||||||
|
@ -99,8 +117,26 @@ async def chat_completion(request: Request):
|
||||||
data["max_tokens"] = user_max_tokens
|
data["max_tokens"] = user_max_tokens
|
||||||
if user_api_base:
|
if user_api_base:
|
||||||
data["api_base"] = user_api_base
|
data["api_base"] = user_api_base
|
||||||
|
## check for custom prompt template ##
|
||||||
|
litellm.register_prompt_template(
|
||||||
|
model=user_model,
|
||||||
|
roles={
|
||||||
|
"system": {
|
||||||
|
"pre_message": os.getenv("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),
|
||||||
|
},
|
||||||
|
"assistant": {
|
||||||
|
"pre_message": os.getenv("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "")
|
||||||
|
},
|
||||||
|
"user": {
|
||||||
|
"pre_message": os.getenv("MODEL_USER_MESSAGE_START_TOKEN", ""),
|
||||||
|
"post_message": os.getenv("MODEL_USER_MESSAGE_END_TOKEN", "")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
initial_prompt_value=os.getenv("MODEL_PRE_PROMPT", ""),
|
||||||
|
final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
|
||||||
|
)
|
||||||
response = litellm.completion(**data)
|
response = litellm.completion(**data)
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
print("reaches stream")
|
print("reaches stream")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.798"
|
version = "0.1.799"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue