Merge pull request #1182 from sumanth13131/usage-based-routing-fix

usage_based_routing_fix
This commit is contained in:
Krish Dholakia 2023-12-23 11:50:34 +05:30 committed by GitHub
commit 2df5ce4b7c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 2 deletions

View file

@ -860,10 +860,22 @@ class Router:
# ----------------------
lowest_tpm = float("inf")
deployment = None
# load model context map
models_context_map = litellm.model_cost
# return deployment with lowest tpm usage
for item in potential_deployments:
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=item["litellm_params"]["model"])
deployment_name=item["litellm_params"]["model"]
custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None)
if custom_llm_provider is not None:
deployment_name = f"{custom_llm_provider}/{deployment_name}"
else:
litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None)
if litellm_provider is not None:
deployment_name = f"{litellm_provider}/{deployment_name}"
item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name)
if item_tpm == 0:
return item

View file

@ -124,4 +124,43 @@ async def test_acompletion_caching_on_router_caching_groups():
pass
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
pytest.fail(f"Error occurred: {e}")
def test_usage_based_routing_completion():
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0301",
"api_key": os.getenv("OPENAI_API_KEY"),
"custom_llm_provider": "Custom-LLM"
},
"tpm": 10000,
"rpm": 5
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0301",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"tpm": 10000,
"rpm": 5
}
]
router = Router(model_list= model_list,
routing_strategy= "usage-based-routing",
set_verbose= False)
max_requests = 5
while max_requests > 0:
try:
router.completion(
model='gpt-3.5-turbo',
messages=[{"content": "write a one sentence poem.", "role": "user"}],
)
except ValueError as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
finally:
max_requests -= 1
router.reset()