mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
build: merge litellm_dev_03_01_2025_p2
This commit is contained in:
parent
87dd195b51
commit
8ea3d4c046
11 changed files with 124 additions and 24 deletions
|
@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
|
|||
def langfuse_client_init(
|
||||
langfuse_public_key=None,
|
||||
langfuse_secret=None,
|
||||
langfuse_secret_key=None,
|
||||
langfuse_host=None,
|
||||
flush_interval=1,
|
||||
) -> LangfuseClass:
|
||||
|
@ -67,7 +68,10 @@ def langfuse_client_init(
|
|||
)
|
||||
|
||||
# Instance variables
|
||||
secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
|
||||
secret_key = (
|
||||
langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
)
|
||||
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||
langfuse_host = langfuse_host or os.getenv(
|
||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||
|
@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
|||
langfuse_client = langfuse_client_init(
|
||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||
)
|
||||
langfuse_prompt_client = self._get_prompt_from_id(
|
||||
|
@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
|||
langfuse_client = langfuse_client_init(
|
||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||
)
|
||||
langfuse_prompt_client = self._get_prompt_from_id(
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,11 +1,9 @@
|
|||
model_list:
|
||||
- model_name: openai-gpt-4o-mini-2024-07-18
|
||||
- model_name: my-langfuse-model
|
||||
litellm_params:
|
||||
model: openai/gpt-4o-mini-2024-07-18
|
||||
configurable_clientside_auth_params: ["api_key"]
|
||||
api_key: "my-bad-key"
|
||||
# - model_name: openai-fallback-model
|
||||
# litellm_params:
|
||||
# model: openai/gpt-3.5-turbo
|
||||
|
||||
|
||||
model: langfuse/openai-model
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: openai-model
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
|
|
@ -1003,6 +1003,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase):
|
|||
class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
||||
success_callback: Optional[List[str]] = []
|
||||
failure_callback: Optional[List[str]] = []
|
||||
callbacks: Optional[List[str]] = []
|
||||
# for now - only supported for langfuse
|
||||
callback_vars: Optional[Dict[str, str]] = {}
|
||||
|
||||
|
@ -1015,6 +1016,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
|||
failure_callback = values.get("failure_callback", [])
|
||||
if failure_callback is None:
|
||||
values.pop("failure_callback", None)
|
||||
callbacks = values.get("callbacks", [])
|
||||
if callbacks is None:
|
||||
values.pop("callbacks", None)
|
||||
|
||||
callback_vars = values.get("callback_vars", {})
|
||||
if callback_vars is None:
|
||||
|
@ -1023,6 +1027,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
|||
return {
|
||||
"success_callback": [],
|
||||
"failure_callback": [],
|
||||
"callbacks": [],
|
||||
"callback_vars": {},
|
||||
}
|
||||
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())
|
||||
|
|
|
@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback(
|
|||
|
||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
elif data.callback_type == "success_and_failure":
|
||||
elif (
|
||||
not data.callback_type or data.callback_type == "success_and_failure"
|
||||
): # assume 'success_and_failure' = litellm.callbacks
|
||||
if team_callback_settings_obj.success_callback is None:
|
||||
team_callback_settings_obj.success_callback = []
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
if team_callback_settings_obj.callbacks is None:
|
||||
team_callback_settings_obj.callbacks = []
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
|
@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback(
|
|||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.callbacks:
|
||||
team_callback_settings_obj.callbacks.append(data.callback_name)
|
||||
|
||||
for var, value in data.callback_vars.items():
|
||||
if team_callback_settings_obj.callback_vars is None:
|
||||
team_callback_settings_obj.callback_vars = {}
|
||||
|
|
|
@ -1750,6 +1750,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "prompt"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
|
||||
|
@ -1863,6 +1864,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "prompt"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
|
@ -1961,6 +1963,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "prompt"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
|
||||
data = deployment["litellm_params"].copy()
|
||||
|
@ -2036,6 +2039,7 @@ class Router:
|
|||
deployment = await self.async_get_available_deployment(
|
||||
model=model,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
data = deployment["litellm_params"].copy()
|
||||
|
@ -2080,6 +2084,7 @@ class Router:
|
|||
model=model,
|
||||
messages=messages,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
|
||||
data = deployment["litellm_params"].copy()
|
||||
|
@ -2185,6 +2190,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
|
||||
|
@ -2283,6 +2289,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "default text"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
|
||||
|
@ -2452,6 +2459,7 @@ class Router:
|
|||
model=model,
|
||||
input=input,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
data = deployment["litellm_params"].copy()
|
||||
|
@ -2549,6 +2557,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||
|
||||
|
@ -2654,6 +2663,7 @@ class Router:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "files-api-fake-text"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
metadata_variable_name = _get_router_metadata_variable_name(
|
||||
function_name="_acreate_batch"
|
||||
|
@ -2850,7 +2860,8 @@ class Router:
|
|||
):
|
||||
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
|
||||
deployment = await self.async_get_available_deployment(
|
||||
model=kwargs["model"]
|
||||
model=kwargs["model"],
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
kwargs["model"] = deployment["litellm_params"]["model"]
|
||||
return await original_function(**kwargs)
|
||||
|
@ -5590,10 +5601,10 @@ class Router:
|
|||
async def async_get_available_deployment(
|
||||
self,
|
||||
model: str,
|
||||
request_kwargs: Dict,
|
||||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
request_kwargs: Optional[Dict] = None,
|
||||
):
|
||||
"""
|
||||
Async implementation of 'get_available_deployments'.
|
||||
|
|
|
@ -451,6 +451,15 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]:
|
|||
return applied_guardrails
|
||||
|
||||
|
||||
def get_dynamic_callbacks(
|
||||
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
|
||||
) -> List:
|
||||
returned_callbacks = litellm.callbacks.copy()
|
||||
if dynamic_callbacks:
|
||||
returned_callbacks.extend(dynamic_callbacks) # type: ignore
|
||||
return returned_callbacks
|
||||
|
||||
|
||||
def function_setup( # noqa: PLR0915
|
||||
original_function: str, rules_obj, start_time, *args, **kwargs
|
||||
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||
|
@ -475,12 +484,18 @@ def function_setup( # noqa: PLR0915
|
|||
## LOGGING SETUP
|
||||
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
||||
|
||||
if len(litellm.callbacks) > 0:
|
||||
for callback in litellm.callbacks:
|
||||
## DYNAMIC CALLBACKS ##
|
||||
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
|
||||
kwargs.pop("callbacks", None)
|
||||
)
|
||||
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
||||
|
||||
if len(all_callbacks) > 0:
|
||||
for callback in all_callbacks:
|
||||
# check if callback is a string - e.g. "lago", "openmeter"
|
||||
if isinstance(callback, str):
|
||||
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||
callback, internal_usage_cache=None, llm_router=None
|
||||
callback, internal_usage_cache=None, llm_router=None # type: ignore
|
||||
)
|
||||
if callback is None or any(
|
||||
isinstance(cb, type(callback))
|
||||
|
|
|
@ -119,7 +119,7 @@ async def test_router_get_available_deployments(async_test):
|
|||
if async_test is True:
|
||||
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
|
||||
deployment = await router.async_get_available_deployment(
|
||||
model=model_group, messages=None
|
||||
model=model_group, messages=None, request_kwargs={}
|
||||
)
|
||||
else:
|
||||
router.cache.set_cache(key=cache_key, value=request_count_dict)
|
||||
|
|
|
@ -569,7 +569,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
|
|||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = await router.async_get_available_deployment(
|
||||
"gpt-3.5-turbo"
|
||||
"gpt-3.5-turbo", request_kwargs={}
|
||||
)
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
selected_model_name = selected_model_id
|
||||
|
|
|
@ -26,11 +26,6 @@ import litellm
|
|||
from litellm import Router
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_router_free_paid_tier():
|
||||
|
@ -93,6 +88,69 @@ async def test_router_free_paid_tier():
|
|||
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_router_free_paid_tier_embeddings():
|
||||
"""
|
||||
Pass list of orgs in 1 model definition,
|
||||
expect a unique deployment for each to be created
|
||||
"""
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o",
|
||||
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
"tags": ["free"],
|
||||
"mock_response": ["1", "2", "3"],
|
||||
},
|
||||
"model_info": {"id": "very-cheap-model"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o-mini",
|
||||
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
"tags": ["paid"],
|
||||
"mock_response": ["1", "2", "3"],
|
||||
},
|
||||
"model_info": {"id": "very-expensive-model"},
|
||||
},
|
||||
],
|
||||
enable_tag_filtering=True,
|
||||
)
|
||||
|
||||
for _ in range(1):
|
||||
# this should pick model with id == very-cheap-model
|
||||
response = await router.aembedding(
|
||||
model="gpt-4",
|
||||
input="Tell me a joke.",
|
||||
metadata={"tags": ["free"]},
|
||||
)
|
||||
|
||||
print("Response: ", response)
|
||||
|
||||
response_extra_info = response._hidden_params
|
||||
print("response_extra_info: ", response_extra_info)
|
||||
|
||||
assert response_extra_info["model_id"] == "very-cheap-model"
|
||||
|
||||
for _ in range(5):
|
||||
# this should pick model with id == very-cheap-model
|
||||
response = await router.aembedding(
|
||||
model="gpt-4",
|
||||
input="Tell me a joke.",
|
||||
metadata={"tags": ["paid"]},
|
||||
)
|
||||
|
||||
print("Response: ", response)
|
||||
|
||||
response_extra_info = response._hidden_params
|
||||
print("response_extra_info: ", response_extra_info)
|
||||
|
||||
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_default_tagged_deployments():
|
||||
"""
|
||||
|
|
|
@ -377,6 +377,7 @@ async def test_multiple_potential_deployments(sync_mode):
|
|||
deployment = await router.async_get_available_deployment(
|
||||
model="azure-model",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
## get id ##
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue