diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 361908aaf1..e5fdbf802e 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -9,9 +9,10 @@ model_list: - litellm_params: api_base: http://0.0.0.0:8080 api_key: '' - model: openai/my-fake-model + model: gpt-4o rpm: 800 - model_name: gpt-3.5-turbo-fake-model + input_cost_per_token: 300 + model_name: gpt-4o - model_name: llama3-70b-8192 litellm_params: model: groq/llama3-70b-8192 diff --git a/litellm/router.py b/litellm/router.py index 8d9884d1f0..b54d70dbbf 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -67,6 +67,7 @@ from litellm.types.llms.openai import ( Thread, ) from litellm.types.router import ( + SPECIAL_MODEL_INFO_PARAMS, AlertingConfig, AllowedFailsPolicy, AssistantsTypedDict, @@ -3794,7 +3795,7 @@ class Router: deployment = Deployment( **model, model_name=_model_name, - litellm_params=_litellm_params, # type: ignore + litellm_params=LiteLLM_Params(**_litellm_params), model_info=_model_info, ) diff --git a/litellm/types/router.py b/litellm/types/router.py index e6864ffe2e..78d516d6c7 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -324,6 +324,9 @@ class DeploymentTypedDict(TypedDict): litellm_params: LiteLLMParamsTypedDict +SPECIAL_MODEL_INFO_PARAMS = ["input_cost_per_token", "output_cost_per_token"] + + class Deployment(BaseModel): model_name: str litellm_params: LiteLLM_Params @@ -342,6 +345,16 @@ class Deployment(BaseModel): model_info = ModelInfo() elif isinstance(model_info, dict): model_info = ModelInfo(**model_info) + + for ( + key + ) in ( + SPECIAL_MODEL_INFO_PARAMS + ): # ensures custom pricing info is consistently in 'model_info' + field = getattr(litellm_params, key, None) + if field is not None: + setattr(model_info, key, field) + super().__init__( model_info=model_info, model_name=model_name,