mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
build: merge litellm_dev_03_01_2025_p2
This commit is contained in:
parent
87dd195b51
commit
8ea3d4c046
11 changed files with 124 additions and 24 deletions
|
@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
|
||||||
def langfuse_client_init(
|
def langfuse_client_init(
|
||||||
langfuse_public_key=None,
|
langfuse_public_key=None,
|
||||||
langfuse_secret=None,
|
langfuse_secret=None,
|
||||||
|
langfuse_secret_key=None,
|
||||||
langfuse_host=None,
|
langfuse_host=None,
|
||||||
flush_interval=1,
|
flush_interval=1,
|
||||||
) -> LangfuseClass:
|
) -> LangfuseClass:
|
||||||
|
@ -67,7 +68,10 @@ def langfuse_client_init(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Instance variables
|
# Instance variables
|
||||||
secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
|
|
||||||
|
secret_key = (
|
||||||
|
langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY")
|
||||||
|
)
|
||||||
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||||
langfuse_host = langfuse_host or os.getenv(
|
langfuse_host = langfuse_host or os.getenv(
|
||||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||||
|
@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
||||||
langfuse_client = langfuse_client_init(
|
langfuse_client = langfuse_client_init(
|
||||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||||
|
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||||
)
|
)
|
||||||
langfuse_prompt_client = self._get_prompt_from_id(
|
langfuse_prompt_client = self._get_prompt_from_id(
|
||||||
|
@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
||||||
langfuse_client = langfuse_client_init(
|
langfuse_client = langfuse_client_init(
|
||||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||||
|
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||||
)
|
)
|
||||||
langfuse_prompt_client = self._get_prompt_from_id(
|
langfuse_prompt_client = self._get_prompt_from_id(
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,11 +1,9 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: openai-gpt-4o-mini-2024-07-18
|
- model_name: my-langfuse-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-4o-mini-2024-07-18
|
model: langfuse/openai-model
|
||||||
configurable_clientside_auth_params: ["api_key"]
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
api_key: "my-bad-key"
|
- model_name: openai-model
|
||||||
# - model_name: openai-fallback-model
|
litellm_params:
|
||||||
# litellm_params:
|
model: openai/gpt-3.5-turbo
|
||||||
# model: openai/gpt-3.5-turbo
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1003,6 +1003,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase):
|
||||||
class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
||||||
success_callback: Optional[List[str]] = []
|
success_callback: Optional[List[str]] = []
|
||||||
failure_callback: Optional[List[str]] = []
|
failure_callback: Optional[List[str]] = []
|
||||||
|
callbacks: Optional[List[str]] = []
|
||||||
# for now - only supported for langfuse
|
# for now - only supported for langfuse
|
||||||
callback_vars: Optional[Dict[str, str]] = {}
|
callback_vars: Optional[Dict[str, str]] = {}
|
||||||
|
|
||||||
|
@ -1015,6 +1016,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
||||||
failure_callback = values.get("failure_callback", [])
|
failure_callback = values.get("failure_callback", [])
|
||||||
if failure_callback is None:
|
if failure_callback is None:
|
||||||
values.pop("failure_callback", None)
|
values.pop("failure_callback", None)
|
||||||
|
callbacks = values.get("callbacks", [])
|
||||||
|
if callbacks is None:
|
||||||
|
values.pop("callbacks", None)
|
||||||
|
|
||||||
callback_vars = values.get("callback_vars", {})
|
callback_vars = values.get("callback_vars", {})
|
||||||
if callback_vars is None:
|
if callback_vars is None:
|
||||||
|
@ -1023,6 +1027,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
||||||
return {
|
return {
|
||||||
"success_callback": [],
|
"success_callback": [],
|
||||||
"failure_callback": [],
|
"failure_callback": [],
|
||||||
|
"callbacks": [],
|
||||||
"callback_vars": {},
|
"callback_vars": {},
|
||||||
}
|
}
|
||||||
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())
|
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())
|
||||||
|
|
|
@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback(
|
||||||
|
|
||||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||||
elif data.callback_type == "success_and_failure":
|
elif (
|
||||||
|
not data.callback_type or data.callback_type == "success_and_failure"
|
||||||
|
): # assume 'success_and_failure' = litellm.callbacks
|
||||||
if team_callback_settings_obj.success_callback is None:
|
if team_callback_settings_obj.success_callback is None:
|
||||||
team_callback_settings_obj.success_callback = []
|
team_callback_settings_obj.success_callback = []
|
||||||
if team_callback_settings_obj.failure_callback is None:
|
if team_callback_settings_obj.failure_callback is None:
|
||||||
team_callback_settings_obj.failure_callback = []
|
team_callback_settings_obj.failure_callback = []
|
||||||
|
if team_callback_settings_obj.callbacks is None:
|
||||||
|
team_callback_settings_obj.callbacks = []
|
||||||
|
|
||||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||||
|
@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback(
|
||||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||||
|
|
||||||
|
if data.callback_name not in team_callback_settings_obj.callbacks:
|
||||||
|
team_callback_settings_obj.callbacks.append(data.callback_name)
|
||||||
|
|
||||||
for var, value in data.callback_vars.items():
|
for var, value in data.callback_vars.items():
|
||||||
if team_callback_settings_obj.callback_vars is None:
|
if team_callback_settings_obj.callback_vars is None:
|
||||||
team_callback_settings_obj.callback_vars = {}
|
team_callback_settings_obj.callback_vars = {}
|
||||||
|
|
|
@ -1750,6 +1750,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "prompt"}],
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
|
||||||
|
@ -1863,6 +1864,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "prompt"}],
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
@ -1961,6 +1963,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "prompt"}],
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
|
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
@ -2036,6 +2039,7 @@ class Router:
|
||||||
deployment = await self.async_get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
@ -2080,6 +2084,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
@ -2185,6 +2190,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
|
||||||
|
@ -2283,6 +2289,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "default text"}],
|
messages=[{"role": "user", "content": "default text"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
|
||||||
|
@ -2452,6 +2459,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
@ -2549,6 +2557,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
|
||||||
|
@ -2654,6 +2663,7 @@ class Router:
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
metadata_variable_name = _get_router_metadata_variable_name(
|
metadata_variable_name = _get_router_metadata_variable_name(
|
||||||
function_name="_acreate_batch"
|
function_name="_acreate_batch"
|
||||||
|
@ -2850,7 +2860,8 @@ class Router:
|
||||||
):
|
):
|
||||||
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
|
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
|
||||||
deployment = await self.async_get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=kwargs["model"]
|
model=kwargs["model"],
|
||||||
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
kwargs["model"] = deployment["litellm_params"]["model"]
|
kwargs["model"] = deployment["litellm_params"]["model"]
|
||||||
return await original_function(**kwargs)
|
return await original_function(**kwargs)
|
||||||
|
@ -5590,10 +5601,10 @@ class Router:
|
||||||
async def async_get_available_deployment(
|
async def async_get_available_deployment(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
request_kwargs: Dict,
|
||||||
messages: Optional[List[Dict[str, str]]] = None,
|
messages: Optional[List[Dict[str, str]]] = None,
|
||||||
input: Optional[Union[str, List]] = None,
|
input: Optional[Union[str, List]] = None,
|
||||||
specific_deployment: Optional[bool] = False,
|
specific_deployment: Optional[bool] = False,
|
||||||
request_kwargs: Optional[Dict] = None,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Async implementation of 'get_available_deployments'.
|
Async implementation of 'get_available_deployments'.
|
||||||
|
|
|
@ -451,6 +451,15 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]:
|
||||||
return applied_guardrails
|
return applied_guardrails
|
||||||
|
|
||||||
|
|
||||||
|
def get_dynamic_callbacks(
|
||||||
|
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
|
||||||
|
) -> List:
|
||||||
|
returned_callbacks = litellm.callbacks.copy()
|
||||||
|
if dynamic_callbacks:
|
||||||
|
returned_callbacks.extend(dynamic_callbacks) # type: ignore
|
||||||
|
return returned_callbacks
|
||||||
|
|
||||||
|
|
||||||
def function_setup( # noqa: PLR0915
|
def function_setup( # noqa: PLR0915
|
||||||
original_function: str, rules_obj, start_time, *args, **kwargs
|
original_function: str, rules_obj, start_time, *args, **kwargs
|
||||||
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||||
|
@ -475,12 +484,18 @@ def function_setup( # noqa: PLR0915
|
||||||
## LOGGING SETUP
|
## LOGGING SETUP
|
||||||
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
||||||
|
|
||||||
if len(litellm.callbacks) > 0:
|
## DYNAMIC CALLBACKS ##
|
||||||
for callback in litellm.callbacks:
|
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
|
||||||
|
kwargs.pop("callbacks", None)
|
||||||
|
)
|
||||||
|
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
||||||
|
|
||||||
|
if len(all_callbacks) > 0:
|
||||||
|
for callback in all_callbacks:
|
||||||
# check if callback is a string - e.g. "lago", "openmeter"
|
# check if callback is a string - e.g. "lago", "openmeter"
|
||||||
if isinstance(callback, str):
|
if isinstance(callback, str):
|
||||||
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||||
callback, internal_usage_cache=None, llm_router=None
|
callback, internal_usage_cache=None, llm_router=None # type: ignore
|
||||||
)
|
)
|
||||||
if callback is None or any(
|
if callback is None or any(
|
||||||
isinstance(cb, type(callback))
|
isinstance(cb, type(callback))
|
||||||
|
|
|
@ -119,7 +119,7 @@ async def test_router_get_available_deployments(async_test):
|
||||||
if async_test is True:
|
if async_test is True:
|
||||||
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
|
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
|
||||||
deployment = await router.async_get_available_deployment(
|
deployment = await router.async_get_available_deployment(
|
||||||
model=model_group, messages=None
|
model=model_group, messages=None, request_kwargs={}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
router.cache.set_cache(key=cache_key, value=request_count_dict)
|
router.cache.set_cache(key=cache_key, value=request_count_dict)
|
||||||
|
|
|
@ -569,7 +569,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
|
||||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||||
for _ in range(1000):
|
for _ in range(1000):
|
||||||
selected_model = await router.async_get_available_deployment(
|
selected_model = await router.async_get_available_deployment(
|
||||||
"gpt-3.5-turbo"
|
"gpt-3.5-turbo", request_kwargs={}
|
||||||
)
|
)
|
||||||
selected_model_id = selected_model["litellm_params"]["model"]
|
selected_model_id = selected_model["litellm_params"]["model"]
|
||||||
selected_model_name = selected_model_id
|
selected_model_name = selected_model_id
|
||||||
|
|
|
@ -26,11 +26,6 @@ import litellm
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
verbose_logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_router_free_paid_tier():
|
async def test_router_free_paid_tier():
|
||||||
|
@ -93,6 +88,69 @@ async def test_router_free_paid_tier():
|
||||||
assert response_extra_info["model_id"] == "very-expensive-model"
|
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_router_free_paid_tier_embeddings():
|
||||||
|
"""
|
||||||
|
Pass list of orgs in 1 model definition,
|
||||||
|
expect a unique deployment for each to be created
|
||||||
|
"""
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
"tags": ["free"],
|
||||||
|
"mock_response": ["1", "2", "3"],
|
||||||
|
},
|
||||||
|
"model_info": {"id": "very-cheap-model"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
"tags": ["paid"],
|
||||||
|
"mock_response": ["1", "2", "3"],
|
||||||
|
},
|
||||||
|
"model_info": {"id": "very-expensive-model"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
enable_tag_filtering=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(1):
|
||||||
|
# this should pick model with id == very-cheap-model
|
||||||
|
response = await router.aembedding(
|
||||||
|
model="gpt-4",
|
||||||
|
input="Tell me a joke.",
|
||||||
|
metadata={"tags": ["free"]},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response: ", response)
|
||||||
|
|
||||||
|
response_extra_info = response._hidden_params
|
||||||
|
print("response_extra_info: ", response_extra_info)
|
||||||
|
|
||||||
|
assert response_extra_info["model_id"] == "very-cheap-model"
|
||||||
|
|
||||||
|
for _ in range(5):
|
||||||
|
# this should pick model with id == very-cheap-model
|
||||||
|
response = await router.aembedding(
|
||||||
|
model="gpt-4",
|
||||||
|
input="Tell me a joke.",
|
||||||
|
metadata={"tags": ["paid"]},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response: ", response)
|
||||||
|
|
||||||
|
response_extra_info = response._hidden_params
|
||||||
|
print("response_extra_info: ", response_extra_info)
|
||||||
|
|
||||||
|
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_default_tagged_deployments():
|
async def test_default_tagged_deployments():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -377,6 +377,7 @@ async def test_multiple_potential_deployments(sync_mode):
|
||||||
deployment = await router.async_get_available_deployment(
|
deployment = await router.async_get_available_deployment(
|
||||||
model="azure-model",
|
model="azure-model",
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
request_kwargs={},
|
||||||
)
|
)
|
||||||
|
|
||||||
## get id ##
|
## get id ##
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue