fix(main.py): fixing print_verbose

This commit is contained in:
Krrish Dholakia 2023-11-04 14:41:34 -07:00
parent 763ecf681a
commit 5b3978eff4
5 changed files with 240 additions and 222 deletions

View file

@ -141,6 +141,7 @@ def completion(
litellm_params=None, litellm_params=None,
logger_fn=None, logger_fn=None,
): ):
try:
headers = validate_environment(api_key, headers) headers = validate_environment(api_key, headers)
task = get_hf_task_for_model(model) task = get_hf_task_for_model(model)
print_verbose(f"{model}, {task}") print_verbose(f"{model}, {task}")
@ -285,7 +286,7 @@ def completion(
completion_response = response.json() completion_response = response.json()
except: except:
raise HuggingfaceError( raise HuggingfaceError(
message=response.text, status_code=response.status_code message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}", status_code=response.status_code
) )
print_verbose(f"response: {completion_response}") print_verbose(f"response: {completion_response}")
if isinstance(completion_response, dict) and "error" in completion_response: if isinstance(completion_response, dict) and "error" in completion_response:
@ -362,6 +363,11 @@ def completion(
model_response.usage.total_tokens = prompt_tokens + completion_tokens model_response.usage.total_tokens = prompt_tokens + completion_tokens
model_response._hidden_params["original_response"] = completion_response model_response._hidden_params["original_response"] = completion_response
return model_response return model_response
except HuggingfaceError as e:
raise e
except Exception as e:
import traceback
raise HuggingfaceError(status_code=500, message=traceback.format_exc())
def embedding( def embedding(

View file

@ -1961,8 +1961,7 @@ def moderation(input: str, api_key: Optional[str]=None):
## Set verbose to true -> ```litellm.set_verbose = True``` ## Set verbose to true -> ```litellm.set_verbose = True```
def print_verbose(print_statement): def print_verbose(print_statement):
if litellm.set_verbose: if litellm.set_verbose:
import logging print(print_statement) # noqa
logging.info(f"LiteLLM: {print_statement}")
def config_completion(**kwargs): def config_completion(**kwargs):
if litellm.config_path != None: if litellm.config_path != None:

View file

@ -52,6 +52,7 @@ def is_port_in_use(port):
@click.command() @click.command()
@click.option('--host', default='0.0.0.0', help='Host for the server to listen on.') @click.option('--host', default='0.0.0.0', help='Host for the server to listen on.')
@click.option('--port', default=8000, help='Port to bind the server to.') @click.option('--port', default=8000, help='Port to bind the server to.')
@click.option('--num_workers', default=1, help='Number of uvicorn workers to spin up')
@click.option('--api_base', default=None, help='API base URL.') @click.option('--api_base', default=None, help='API base URL.')
@click.option('--api_version', default="2023-07-01-preview", help='For azure - pass in the api version.') @click.option('--api_version', default="2023-07-01-preview", help='For azure - pass in the api version.')
@click.option('--model', '-m', default=None, help='The model name to pass to litellm expects') @click.option('--model', '-m', default=None, help='The model name to pass to litellm expects')
@ -74,17 +75,17 @@ def is_port_in_use(port):
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
@click.option('--local', is_flag=True, default=False, help='for local debugging') @click.option('--local', is_flag=True, default=False, help='for local debugging')
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') @click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost): def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost, num_workers):
global feature_telemetry global feature_telemetry
args = locals() args = locals()
if local: if local:
from proxy_server import app, initialize, print_cost_logs, usage_telemetry, add_keys_to_config from proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config
debug = True debug = True
else: else:
try: try:
from .proxy_server import app, initialize, print_cost_logs, usage_telemetry, add_keys_to_config from .proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config
except ImportError as e: except ImportError as e:
from proxy_server import app, initialize, print_cost_logs, usage_telemetry, add_keys_to_config from proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config
feature_telemetry = usage_telemetry feature_telemetry = usage_telemetry
if create_proxy == True: if create_proxy == True:
repo_url = 'https://github.com/BerriAI/litellm' repo_url = 'https://github.com/BerriAI/litellm'
@ -163,7 +164,7 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
else: else:
if headers: if headers:
headers = json.loads(headers) headers = json.loads(headers)
initialize(model=model, alias=alias, api_base=api_base, api_version=api_version, debug=debug, temperature=temperature, max_tokens=max_tokens, request_timeout=request_timeout, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save, config=config) save_worker_config(model=model, alias=alias, api_base=api_base, api_version=api_version, debug=debug, temperature=temperature, max_tokens=max_tokens, request_timeout=request_timeout, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save, config=config)
try: try:
import uvicorn import uvicorn
except: except:
@ -174,7 +175,7 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
if port == 8000 and is_port_in_use(port): if port == 8000 and is_port_in_use(port):
port = random.randint(1024, 49152) port = random.randint(1024, 49152)
uvicorn.run(app, host=host, port=port) uvicorn.run("proxy_server:app", host=host, port=port, workers=num_workers)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -129,11 +129,12 @@ llm_router: Optional[litellm.Router] = None
llm_model_list: Optional[list] = None llm_model_list: Optional[list] = None
server_settings: dict = {} server_settings: dict = {}
log_file = "api_log.json" log_file = "api_log.json"
worker_config = None
#### HELPER FUNCTIONS #### #### HELPER FUNCTIONS ####
def print_verbose(print_statement): def print_verbose(print_statement):
global user_debug global user_debug
print(f"user debug value: {user_debug}")
if user_debug: if user_debug:
print(print_statement) print(print_statement)
@ -337,6 +338,9 @@ def load_config():
except: except:
pass pass
def save_worker_config(**data):
import json
os.environ["WORKER_CONFIG"] = json.dumps(data)
def initialize( def initialize(
model, model,
@ -532,6 +536,7 @@ def litellm_completion(*args, **kwargs):
for key, value in m["litellm_params"].items(): for key, value in m["litellm_params"].items():
kwargs[key] = value kwargs[key] = value
break break
print(f"litellm set verbose pre-call: {litellm.set_verbose}")
if call_type == "chat_completion": if call_type == "chat_completion":
response = litellm.completion(*args, **kwargs) response = litellm.completion(*args, **kwargs)
elif call_type == "text_completion": elif call_type == "text_completion":
@ -540,6 +545,14 @@ def litellm_completion(*args, **kwargs):
return StreamingResponse(data_generator(response), media_type='text/event-stream') return StreamingResponse(data_generator(response), media_type='text/event-stream')
return response return response
@app.on_event("startup")
def startup_event():
import json
worker_config = json.loads(os.getenv("WORKER_CONFIG"))
initialize(**worker_config)
print(f"\033[32mWorker Initialized\033[0m\n")
#### API ENDPOINTS #### #### API ENDPOINTS ####
@router.get("/v1/models") @router.get("/v1/models")
@router.get("/models") # if project requires model list @router.get("/models") # if project requires model list

View file

@ -285,8 +285,7 @@ class TextCompletionResponse(OpenAIObject):
############################################################ ############################################################
def print_verbose(print_statement): def print_verbose(print_statement):
if litellm.set_verbose: if litellm.set_verbose:
import logging print(print_statement) # noqa
logging.info(f"LiteLLM: {print_statement}")
####### LOGGING ################### ####### LOGGING ###################
from enum import Enum from enum import Enum