forked from phoenix/litellm-mirror
Merge pull request #1182 from sumanth13131/usage-based-routing-fix
usage_based_routing_fix
This commit is contained in:
commit
2df5ce4b7c
2 changed files with 53 additions and 2 deletions
|
@ -861,9 +861,21 @@ class Router:
|
||||||
lowest_tpm = float("inf")
|
lowest_tpm = float("inf")
|
||||||
deployment = None
|
deployment = None
|
||||||
|
|
||||||
|
# load model context map
|
||||||
|
models_context_map = litellm.model_cost
|
||||||
|
|
||||||
|
|
||||||
# return deployment with lowest tpm usage
|
# return deployment with lowest tpm usage
|
||||||
for item in potential_deployments:
|
for item in potential_deployments:
|
||||||
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=item["litellm_params"]["model"])
|
deployment_name=item["litellm_params"]["model"]
|
||||||
|
custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None)
|
||||||
|
if custom_llm_provider is not None:
|
||||||
|
deployment_name = f"{custom_llm_provider}/{deployment_name}"
|
||||||
|
else:
|
||||||
|
litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None)
|
||||||
|
if litellm_provider is not None:
|
||||||
|
deployment_name = f"{litellm_provider}/{deployment_name}"
|
||||||
|
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name)
|
||||||
|
|
||||||
if item_tpm == 0:
|
if item_tpm == 0:
|
||||||
return item
|
return item
|
||||||
|
|
|
@ -125,3 +125,42 @@ async def test_acompletion_caching_on_router_caching_groups():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_usage_based_routing_completion():
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo-0301",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"custom_llm_provider": "Custom-LLM"
|
||||||
|
},
|
||||||
|
"tpm": 10000,
|
||||||
|
"rpm": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo-0301",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
"tpm": 10000,
|
||||||
|
"rpm": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
router = Router(model_list= model_list,
|
||||||
|
routing_strategy= "usage-based-routing",
|
||||||
|
set_verbose= False)
|
||||||
|
max_requests = 5
|
||||||
|
while max_requests > 0:
|
||||||
|
try:
|
||||||
|
router.completion(
|
||||||
|
model='gpt-3.5-turbo',
|
||||||
|
messages=[{"content": "write a one sentence poem.", "role": "user"}],
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
finally:
|
||||||
|
max_requests -= 1
|
||||||
|
router.reset()
|
Loading…
Add table
Add a link
Reference in a new issue