feat - set custom routing strategy

This commit is contained in:
Ishaan Jaff 2024-06-20 13:49:44 -07:00
parent 3d90b25005
commit b6066d1ece
3 changed files with 45 additions and 1 deletions

View file

@ -865,7 +865,7 @@
},
"deepseek-coder": {
"max_tokens": 4096,
"max_input_tokens": 16000,
"max_input_tokens": 32000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000014,
"output_cost_per_token": 0.00000028,
@ -1984,6 +1984,15 @@
"litellm_provider": "replicate",
"mode": "chat"
},
"openrouter/deepseek/deepseek-coder": {
"max_tokens": 4096,
"max_input_tokens": 32000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000014,
"output_cost_per_token": 0.00000028,
"litellm_provider": "openrouter",
"mode": "chat"
},
"openrouter/microsoft/wizardlm-2-8x22b:nitro": {
"max_tokens": 65536,
"input_cost_per_token": 0.000001,

View file

@ -69,6 +69,7 @@ from litellm.types.router import (
AlertingConfig,
AllowedFailsPolicy,
AssistantsTypedDict,
CustomRoutingStrategy,
Deployment,
DeploymentTypedDict,
LiteLLM_Params,
@ -4814,6 +4815,18 @@ class Router:
except Exception as e:
pass
def set_custom_routing_strategy(self, CustomRoutingStrategy: CustomRoutingStrategy):
setattr(
self,
"get_available_deployment",
CustomRoutingStrategy.get_available_deployment,
)
setattr(
self,
"async_get_available_deployment",
CustomRoutingStrategy.async_get_available_deployment,
)
def flush_cache(self):
litellm.cache = None
self.cache.flush_cache()

View file

@ -451,3 +451,25 @@ class ModelGroupInfo(BaseModel):
class AssistantsTypedDict(TypedDict):
custom_llm_provider: Literal["azure", "openai"]
litellm_params: LiteLLMParamsTypedDict
class CustomRoutingStrategy:
async def async_get_available_deployment(
self,
model: str,
messages: Optional[List[Dict[str, str]]] = None,
input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False,
request_kwargs: Optional[Dict] = None,
):
pass
def get_available_deployment(
self,
model: str,
messages: Optional[List[Dict[str, str]]] = None,
input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False,
request_kwargs: Optional[Dict] = None,
):
pass