forked from phoenix/litellm-mirror
fix(proxy_cli.py): don't double load the router config
was causing callbacks to be instantiated twice - double couting usage in cache
This commit is contained in:
parent
d8da4cf8bb
commit
b2741933dc
5 changed files with 12 additions and 16 deletions
|
@ -26,13 +26,12 @@ litellm_settings:
|
|||
success_callback: ["prometheus"]
|
||||
upperbound_key_generate_params:
|
||||
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||
|
||||
# litellm_settings:
|
||||
# drop_params: True
|
||||
# max_budget: 800021
|
||||
# budget_duration: 30d
|
||||
# # cache: true
|
||||
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing
|
||||
redis_host: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
|
||||
redis_password: madeBerri@992
|
||||
redis_port: 16337
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
|
@ -425,9 +425,10 @@ def run_server(
|
|||
)
|
||||
|
||||
proxy_config = ProxyConfig()
|
||||
_, _, general_settings = asyncio.run(
|
||||
proxy_config.load_config(router=None, config_file_path=config)
|
||||
)
|
||||
_config = asyncio.run(proxy_config.get_config(config_file_path=config))
|
||||
general_settings = _config.get("general_settings", {})
|
||||
if general_settings is None:
|
||||
general_settings = {}
|
||||
database_url = general_settings.get("database_url", None)
|
||||
db_connection_pool_limit = general_settings.get(
|
||||
"database_connection_pool_limit", 100
|
||||
|
|
|
@ -2335,6 +2335,7 @@ class ProxyConfig:
|
|||
"background_health_checks", False
|
||||
)
|
||||
health_check_interval = general_settings.get("health_check_interval", 300)
|
||||
|
||||
router_params: dict = {
|
||||
"cache_responses": litellm.cache
|
||||
!= None, # cache if user passed in cache values
|
||||
|
|
|
@ -2374,7 +2374,6 @@ class Router:
|
|||
"""
|
||||
Returns the deployment based on routing strategy
|
||||
"""
|
||||
|
||||
# users need to explicitly call a specific deployment, by setting `specific_deployment = True` as completion()/embedding() kwarg
|
||||
# When this was no explicit we had several issues with fallbacks timing out
|
||||
if specific_deployment == True:
|
||||
|
|
|
@ -236,7 +236,7 @@ class HiddenParams(OpenAIObject):
|
|||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
protected_namespaces = ()
|
||||
protected_namespaces = ()
|
||||
|
||||
def get(self, key, default=None):
|
||||
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||
|
@ -1990,9 +1990,6 @@ class Logging:
|
|||
else:
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
if isinstance(callback, CustomLogger): # custom logger class
|
||||
print_verbose(
|
||||
f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
|
||||
)
|
||||
if self.stream == True:
|
||||
if (
|
||||
"async_complete_streaming_response"
|
||||
|
@ -2376,7 +2373,6 @@ def client(original_function):
|
|||
if litellm.use_client or (
|
||||
"use_client" in kwargs and kwargs["use_client"] == True
|
||||
):
|
||||
print_verbose(f"litedebugger initialized")
|
||||
if "lite_debugger" not in litellm.input_callback:
|
||||
litellm.input_callback.append("lite_debugger")
|
||||
if "lite_debugger" not in litellm.success_callback:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue