From bd17424c4b8a57a20aa8cb7bc6790f01da1ef6ff Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Fri, 27 Sep 2024 17:54:13 -0700 Subject: [PATCH] LiteLLM Minor Fixes & Improvements (09/26/2024) (#5925) (#5937) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * LiteLLM Minor Fixes & Improvements (09/26/2024) (#5925) * fix(litellm_logging.py): don't initialize prometheus_logger if non premium user Prevents bad error messages in logs Fixes https://github.com/BerriAI/litellm/issues/5897 * Add Support for Custom Providers in Vision and Function Call Utils (#5688) * Add Support for Custom Providers in Vision and Function Call Utils Lookup * Remove parallel function call due to missing model info param * Add Unit Tests for Vision and Function Call Changes * fix-#5920: set header value to string to fix "'int' object has no att… (#5922) * LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880) * LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842) * feat(auth_utils.py): enable admin to allow client-side credentials to be passed Makes it easier for devs to experiment with finetuned fireworks ai models * feat(router.py): allow setting configurable_clientside_auth_params for a model Closes https://github.com/BerriAI/litellm/issues/5843 * build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit Fixes https://github.com/BerriAI/litellm/issues/5850 * fix(azure_ai/): support content list for azure ai Fixes https://github.com/BerriAI/litellm/issues/4237 * fix(litellm_logging.py): always set saved_cache_cost Set to 0 by default * fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing handles calling 405b+ size models * fix(slack_alerting.py): fix error alerting for failed spend tracking Fixes regression with slack alerting error monitoring * fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error * docs(bedrock.md): add llama3-1 models * test: fix tests * fix(azure_ai/chat): fix transformation for azure ai calls * feat(azure_ai/embed): Add azure ai embeddings support Closes https://github.com/BerriAI/litellm/issues/5861 * fix(azure_ai/embed): enable async embedding * feat(azure_ai/embed): support azure ai multimodal embeddings * fix(azure_ai/embed): support async multi modal embeddings * feat(together_ai/embed): support together ai embedding calls * feat(rerank/main.py): log source documents for rerank endpoints to langfuse improves rerank endpoint logging * fix(langfuse.py): support logging `/audio/speech` input to langfuse * test(test_embedding.py): fix test * test(test_completion_cost.py): fix helper util * fix-#5920: set header value to string to fix "'int' object has no attribute 'encode'" --------- Co-authored-by: Krish Dholakia * Revert "fix-#5920: set header value to string to fix "'int' object has no att…" (#5926) This reverts commit a554ae269504e482cf9ce52fa81fa4116da065ec. * build(model_prices_and_context_window.json): add azure ai cohere rerank model pricing Enables cost tracking for azure ai cohere rerank models * fix(litellm_logging.py): fix debug log to be clearer Closes https://github.com/BerriAI/litellm/issues/5909 * test(test_utils.py): fix test name * fix(azure_ai/cost_calculator.py): support cost tracking for azure ai rerank models * fix(azure_ai): fix azure ai base model cost tracking for rerank endpoints * fix(converse_handler.py): support new llama 3-2 models Fixes https://github.com/BerriAI/litellm/issues/5901 * fix(litellm_logging.py): ensure response is redacted for standard message logging Fixes https://github.com/BerriAI/litellm/issues/5890#issuecomment-2378242360 * fix(cost_calculator.py): use 'get_model_info' for cohere rerank cost calculation allows user to set custom cost for model * fix(config.yml): fix docker hub auht * build(config.yml): add docker auth to all tests * fix(db/create_views.py): fix linting error * fix(main.py): fix circular import * fix(azure_ai/__init__.py): fix circular import * fix(main.py): fix import * fix: fix linting errors * test: fix test * fix(proxy_server.py): pass premium user value on startup used for prometheus init --------- Co-authored-by: Cole Murray Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com> * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * test: fix test * test(test_rerank.py): fix test --------- Co-authored-by: Cole Murray Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com> Co-authored-by: Ishaan Jaff --- .circleci/config.yml | 3 + litellm/cost_calculator.py | 28 +++-- litellm/litellm_core_utils/litellm_logging.py | 41 +++++-- litellm/llms/azure_ai/__init__.py | 3 - litellm/llms/azure_ai/chat/__init__.py | 1 + litellm/llms/azure_ai/cost_calculator.py | 33 ++++++ litellm/llms/azure_ai/embed/__init__.py | 1 + litellm/llms/azure_ai/rerank/handler.py | 101 +++++++++++++++--- litellm/llms/bedrock/chat/converse_handler.py | 13 +-- .../bedrock/chat/converse_transformation.py | 19 ++++ litellm/llms/cohere/cost_calculator.py | 31 ++++++ litellm/llms/cohere/rerank.py | 33 ++++-- litellm/main.py | 6 +- ...odel_prices_and_context_window_backup.json | 66 ++++++++++++ litellm/proxy/_new_secret_config.yaml | 31 ++++-- litellm/proxy/db/create_views.py | 9 +- litellm/proxy/proxy_server.py | 8 +- litellm/proxy/rerank_endpoints/endpoints.py | 3 +- litellm/proxy/utils.py | 5 + litellm/tests/test_bedrock_completion.py | 11 +- litellm/tests/test_completion_cost.py | 35 ++++++ litellm/tests/test_custom_callback_input.py | 1 + litellm/tests/test_prometheus.py | 26 +++-- litellm/tests/test_rerank.py | 4 + litellm/tests/test_utils.py | 39 +++++++ litellm/types/utils.py | 1 + litellm/utils.py | 49 ++++++--- model_prices_and_context_window.json | 66 ++++++++++++ .../test_key_management.py | 1 - 29 files changed, 564 insertions(+), 104 deletions(-) delete mode 100644 litellm/llms/azure_ai/__init__.py create mode 100644 litellm/llms/azure_ai/chat/__init__.py create mode 100644 litellm/llms/azure_ai/embed/__init__.py create mode 100644 litellm/llms/cohere/cost_calculator.py diff --git a/.circleci/config.yml b/.circleci/config.yml index edbe59113..6014cb8f1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -280,6 +280,9 @@ jobs: installing_litellm_on_python: docker: - image: circleci/python:3.8 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index d2c5b2cf9..797606121 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -22,6 +22,12 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha from litellm.llms.anthropic.cost_calculation import ( cost_per_token as anthropic_cost_per_token, ) +from litellm.llms.azure_ai.cost_calculator import ( + cost_per_query as azure_ai_rerank_cost_per_query, +) +from litellm.llms.cohere.cost_calculator import ( + cost_per_query as cohere_rerank_cost_per_query, +) from litellm.llms.databricks.cost_calculator import ( cost_per_token as databricks_cost_per_token, ) @@ -85,6 +91,8 @@ def cost_per_token( ### CUSTOM PRICING ### custom_cost_per_token: Optional[CostPerToken] = None, custom_cost_per_second: Optional[float] = None, + ### NUMBER OF QUERIES ### + number_of_queries: Optional[int] = None, ### CALL TYPE ### call_type: Literal[ "embedding", @@ -190,7 +198,6 @@ def cost_per_token( # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models print_verbose(f"Looking up model={model} in model_cost_map") - if custom_llm_provider == "vertex_ai": cost_router = google_cost_router( model=model_without_prefix, @@ -252,12 +259,10 @@ def cost_per_token( ) return prompt_cost, completion_cost elif call_type == "arerank" or call_type == "rerank": - completion_tokens_cost_usd_dollar = rerank_cost( + return rerank_cost( model=model, custom_llm_provider=custom_llm_provider, ) - prompt_tokens_cost_usd_dollar = 0 - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar elif model in model_cost_ref: print_verbose(f"Success: model={model} in model_cost_map") print_verbose( @@ -793,7 +798,6 @@ def response_cost_calculator( if custom_pricing is True: # override defaults if custom pricing is set base_model = model # base_model defaults to None if not set on model_info - response_cost = completion_cost( completion_response=response_object, call_type=call_type, @@ -808,23 +812,27 @@ def response_cost_calculator( def rerank_cost( model: str, custom_llm_provider: Optional[str], -) -> float: +) -> Tuple[float, float]: """ Returns - float or None: cost of response OR none if error. """ + default_num_queries = 1 _, custom_llm_provider, _, _ = litellm.get_llm_provider( model=model, custom_llm_provider=custom_llm_provider ) try: if custom_llm_provider == "cohere": - return 0.002 + return cohere_rerank_cost_per_query( + model=model, num_queries=default_num_queries + ) + elif custom_llm_provider == "azure_ai": + return azure_ai_rerank_cost_per_query( + model=model, num_queries=default_num_queries + ) raise ValueError( f"invalid custom_llm_provider for rerank model: {model}, custom_llm_provider: {custom_llm_provider}" ) except Exception as e: - verbose_logger.exception( - f"litellm.cost_calculator.py::rerank_cost - Exception occurred - {str(e)}" - ) raise e diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 1219feac9..85a2b3cd2 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -31,6 +31,7 @@ from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_custom_logger, redact_message_input_output_from_logging, ) +from litellm.proxy._types import CommonProxyErrors from litellm.rerank_api.types import RerankResponse from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS @@ -97,7 +98,9 @@ try: GenericAPILogger, ) except Exception as e: - verbose_logger.debug(f"Exception import enterprise features {str(e)}") + verbose_logger.debug( + f"[Non-Blocking] Unable to import GenericAPILogger - LiteLLM Enterprise Feature - {str(e)}" + ) _in_memory_loggers: List[Any] = [] @@ -2140,7 +2143,8 @@ def _init_custom_logger_compatible_class( llm_router: Optional[ Any ], # expect litellm.Router, but typing errors due to circular import -) -> CustomLogger: + premium_user: bool = False, +) -> Optional[CustomLogger]: if logging_integration == "lago": for callback in _in_memory_loggers: if isinstance(callback, LagoLogger): @@ -2174,13 +2178,19 @@ def _init_custom_logger_compatible_class( _in_memory_loggers.append(_langsmith_logger) return _langsmith_logger # type: ignore elif logging_integration == "prometheus": - for callback in _in_memory_loggers: - if isinstance(callback, PrometheusLogger): - return callback # type: ignore + if premium_user: + for callback in _in_memory_loggers: + if isinstance(callback, PrometheusLogger): + return callback # type: ignore - _prometheus_logger = PrometheusLogger() - _in_memory_loggers.append(_prometheus_logger) - return _prometheus_logger # type: ignore + _prometheus_logger = PrometheusLogger() + _in_memory_loggers.append(_prometheus_logger) + return _prometheus_logger # type: ignore + else: + verbose_logger.warning( + f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}" + ) + return None elif logging_integration == "datadog": for callback in _in_memory_loggers: if isinstance(callback, DataDogLogger): @@ -2411,6 +2421,7 @@ def get_standard_logging_object_payload( response_obj = init_response_obj else: response_obj = {} + # standardize this function to be used across, s3, dynamoDB, langfuse logging litellm_params = kwargs.get("litellm_params", {}) proxy_server_request = litellm_params.get("proxy_server_request") or {} @@ -2546,6 +2557,16 @@ def get_standard_logging_object_payload( response_cost: float = kwargs.get("response_cost", 0) or 0.0 + if response_obj is not None: + final_response_obj: Optional[Union[dict, str, list]] = response_obj + elif isinstance(init_response_obj, list) or isinstance(init_response_obj, str): + final_response_obj = init_response_obj + else: + final_response_obj = None + + if litellm.turn_off_message_logging: + final_response_obj = "redacted-by-litellm" + payload: StandardLoggingPayload = StandardLoggingPayload( id=str(id), call_type=call_type or "", @@ -2569,9 +2590,7 @@ def get_standard_logging_object_payload( model_id=_model_id, requester_ip_address=clean_metadata.get("requester_ip_address", None), messages=kwargs.get("messages"), - response=( # type: ignore - response_obj if len(response_obj.keys()) > 0 else init_response_obj # type: ignore - ), + response=final_response_obj, model_parameters=kwargs.get("optional_params", None), hidden_params=clean_hidden_params, model_map_information=model_cost_information, diff --git a/litellm/llms/azure_ai/__init__.py b/litellm/llms/azure_ai/__init__.py deleted file mode 100644 index c3e4342ec..000000000 --- a/litellm/llms/azure_ai/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .chat.handler import AzureAIChatCompletion -from .embed.handler import AzureAIEmbedding -from .rerank.handler import AzureAIRerank diff --git a/litellm/llms/azure_ai/chat/__init__.py b/litellm/llms/azure_ai/chat/__init__.py new file mode 100644 index 000000000..62378de40 --- /dev/null +++ b/litellm/llms/azure_ai/chat/__init__.py @@ -0,0 +1 @@ +from .handler import AzureAIChatCompletion diff --git a/litellm/llms/azure_ai/cost_calculator.py b/litellm/llms/azure_ai/cost_calculator.py index e69de29bb..00e754214 100644 --- a/litellm/llms/azure_ai/cost_calculator.py +++ b/litellm/llms/azure_ai/cost_calculator.py @@ -0,0 +1,33 @@ +""" +Handles custom cost calculation for Azure AI models. + +Custom cost calculation for Azure AI models only requied for rerank. +""" + +from typing import Tuple + +from litellm.types.utils import Usage +from litellm.utils import get_model_info + + +def cost_per_query(model: str, num_queries: int = 1) -> Tuple[float, float]: + """ + Calculates the cost per query for a given rerank model. + + Input: + - model: str, the model name without provider prefix + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + model_info = get_model_info(model=model, custom_llm_provider="azure_ai") + + if ( + "input_cost_per_query" not in model_info + or model_info["input_cost_per_query"] is None + ): + return 0.0, 0.0 + + prompt_cost = model_info["input_cost_per_query"] * num_queries + + return prompt_cost, 0.0 diff --git a/litellm/llms/azure_ai/embed/__init__.py b/litellm/llms/azure_ai/embed/__init__.py new file mode 100644 index 000000000..e0d67acb5 --- /dev/null +++ b/litellm/llms/azure_ai/embed/__init__.py @@ -0,0 +1 @@ +from .handler import AzureAIEmbedding diff --git a/litellm/llms/azure_ai/rerank/handler.py b/litellm/llms/azure_ai/rerank/handler.py index 523448eec..9910086fc 100644 --- a/litellm/llms/azure_ai/rerank/handler.py +++ b/litellm/llms/azure_ai/rerank/handler.py @@ -8,6 +8,57 @@ from litellm.rerank_api.types import RerankResponse class AzureAIRerank(CohereRerank): + + def get_base_model(self, azure_model_group: Optional[str]) -> Optional[str]: + if azure_model_group is None: + return None + if azure_model_group == "offer-cohere-rerank-mul-paygo": + return "azure_ai/cohere-rerank-v3-multilingual" + if azure_model_group == "offer-cohere-rerank-eng-paygo": + return "azure_ai/cohere-rerank-v3-english" + return azure_model_group + + async def async_azure_rerank( + self, + model: str, + api_key: str, + api_base: str, + query: str, + documents: List[Union[str, Dict[str, Any]]], + headers: Optional[dict], + litellm_logging_obj: LiteLLMLoggingObj, + top_n: Optional[int] = None, + rank_fields: Optional[List[str]] = None, + return_documents: Optional[bool] = True, + max_chunks_per_doc: Optional[int] = None, + ): + returned_response: RerankResponse = await super().rerank( # type: ignore + model=model, + api_key=api_key, + api_base=api_base, + query=query, + documents=documents, + top_n=top_n, + rank_fields=rank_fields, + return_documents=return_documents, + max_chunks_per_doc=max_chunks_per_doc, + _is_async=True, + headers=headers, + litellm_logging_obj=litellm_logging_obj, + ) + + # get base model + additional_headers = ( + returned_response._hidden_params.get("additional_headers") or {} + ) + + base_model = self.get_base_model( + additional_headers.get("llm_provider-azureml-model-group") + ) + returned_response._hidden_params["model"] = base_model + + return returned_response + def rerank( self, model: str, @@ -36,17 +87,39 @@ class AzureAIRerank(CohereRerank): if not api_base_url.path.endswith("/v1/rerank"): api_base = str(api_base_url.copy_with(path="/v1/rerank")) - return super().rerank( - model=model, - api_key=api_key, - api_base=api_base, - query=query, - documents=documents, - top_n=top_n, - rank_fields=rank_fields, - return_documents=return_documents, - max_chunks_per_doc=max_chunks_per_doc, - _is_async=_is_async, - headers=headers, - litellm_logging_obj=litellm_logging_obj, - ) + if _is_async: + return self.async_azure_rerank( # type: ignore + model=model, + api_key=api_key, + api_base=api_base, + query=query, + documents=documents, + top_n=top_n, + rank_fields=rank_fields, + return_documents=return_documents, + max_chunks_per_doc=max_chunks_per_doc, + headers=headers, + litellm_logging_obj=litellm_logging_obj, + ) + else: + returned_response = super().rerank( + model=model, + api_key=api_key, + api_base=api_base, + query=query, + documents=documents, + top_n=top_n, + rank_fields=rank_fields, + return_documents=return_documents, + max_chunks_per_doc=max_chunks_per_doc, + _is_async=_is_async, + headers=headers, + litellm_logging_obj=litellm_logging_obj, + ) + + # get base model + base_model = self.get_base_model( + returned_response._hidden_params.get("llm_provider-azureml-model-group") + ) + returned_response._hidden_params["model"] = base_model + return returned_response diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py index e3b441295..6d4f9b66c 100644 --- a/litellm/llms/bedrock/chat/converse_handler.py +++ b/litellm/llms/bedrock/chat/converse_handler.py @@ -20,17 +20,9 @@ from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_ca BEDROCK_CONVERSE_MODELS = [ "anthropic.claude-3-5-sonnet-20240620-v1:0", - "us.anthropic.claude-3-5-sonnet-20240620-v1:0", - "eu.anthropic.claude-3-5-sonnet-20240620-v1:0", "anthropic.claude-3-opus-20240229-v1:0", - "us.anthropic.claude-3-opus-20240229-v1:0", - "eu.anthropic.claude-3-opus-20240229-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", - "us.anthropic.claude-3-sonnet-20240229-v1:0", - "eu.anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-3-haiku-20240307-v1:0", - "us.anthropic.claude-3-haiku-20240307-v1:0", - "eu.anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-v2", "anthropic.claude-v2:1", "anthropic.claude-v1", @@ -43,6 +35,11 @@ BEDROCK_CONVERSE_MODELS = [ "meta.llama3-1-405b-instruct-v1:0", "meta.llama3-70b-instruct-v1:0", "mistral.mistral-large-2407-v1:0", + "meta.llama3-2-1b-instruct-v1:0", + "meta.llama3-2-3b-instruct-v1:0", + "meta.llama3-2-11b-instruct-v1:0", + "meta.llama3-2-90b-instruct-v1:0", + "meta.llama3-2-405b-instruct-v1:0", ] diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 86f1c102d..8229f6a58 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -430,3 +430,22 @@ class AmazonConverseConfig: setattr(model_response, "trace", completion_response["trace"]) return model_response + + def _supported_cross_region_inference_region(self) -> List[str]: + """ + Abbreviations of regions AWS Bedrock supports for cross region inference + """ + return ["us", "eu"] + + def _get_base_model(self, model: str) -> str: + """ + Get the base model from the given model name. + + Handle model names like - "us.meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1" + AND "meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1" + """ + + potential_region = model.split(".", 1)[0] + if potential_region in self._supported_cross_region_inference_region(): + return model.split(".", 1)[1] + return model diff --git a/litellm/llms/cohere/cost_calculator.py b/litellm/llms/cohere/cost_calculator.py new file mode 100644 index 000000000..224dd5cfa --- /dev/null +++ b/litellm/llms/cohere/cost_calculator.py @@ -0,0 +1,31 @@ +""" +Custom cost calculator for Cohere rerank models +""" + +from typing import Tuple + +from litellm.utils import get_model_info + + +def cost_per_query(model: str, num_queries: int = 1) -> Tuple[float, float]: + """ + Calculates the cost per query for a given rerank model. + + Input: + - model: str, the model name without provider prefix + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + + model_info = get_model_info(model=model, custom_llm_provider="cohere") + + if ( + "input_cost_per_query" not in model_info + or model_info["input_cost_per_query"] is None + ): + return 0.0, 0.0 + + prompt_cost = model_info["input_cost_per_query"] * num_queries + + return prompt_cost, 0.0 diff --git a/litellm/llms/cohere/rerank.py b/litellm/llms/cohere/rerank.py index 5332be00c..a41c3dfb2 100644 --- a/litellm/llms/cohere/rerank.py +++ b/litellm/llms/cohere/rerank.py @@ -6,9 +6,6 @@ LiteLLM supports the re rank API format, no paramter transformation occurs from typing import Any, Dict, List, Optional, Union -import httpx -from pydantic import BaseModel - import litellm from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.base import BaseLLM @@ -65,7 +62,6 @@ class CohereRerank(BaseLLM): ) request_data_dict = request_data.dict(exclude_none=True) - ## LOGGING litellm_logging_obj.pre_call( input=request_data_dict, @@ -78,7 +74,7 @@ class CohereRerank(BaseLLM): ) if _is_async: - return self.async_rerank(request_data_dict=request_data_dict, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method + return self.async_rerank(request_data=request_data, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method client = _get_httpx_client() response = client.post( @@ -87,15 +83,26 @@ class CohereRerank(BaseLLM): json=request_data_dict, ) - return RerankResponse(**response.json()) + returned_response = RerankResponse(**response.json()) + + _response_headers = response.headers + + llm_response_headers = { + "{}-{}".format("llm_provider", k): v for k, v in _response_headers.items() + } + returned_response._hidden_params["additional_headers"] = llm_response_headers + + return returned_response async def async_rerank( self, - request_data_dict: Dict[str, Any], + request_data: RerankRequest, api_key: str, api_base: str, headers: dict, ) -> RerankResponse: + request_data_dict = request_data.dict(exclude_none=True) + client = get_async_httpx_client(llm_provider=litellm.LlmProviders.COHERE) response = await client.post( @@ -104,4 +111,14 @@ class CohereRerank(BaseLLM): json=request_data_dict, ) - return RerankResponse(**response.json()) + returned_response = RerankResponse(**response.json()) + + _response_headers = dict(response.headers) + + llm_response_headers = { + "{}-{}".format("llm_provider", k): v for k, v in _response_headers.items() + } + returned_response._hidden_params["additional_headers"] = llm_response_headers + returned_response._hidden_params["model"] = request_data.model + + return returned_response diff --git a/litellm/main.py b/litellm/main.py index 6ed5534fb..ff9ca81c1 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -83,7 +83,8 @@ from .llms import ( from .llms.AI21 import completion as ai21 from .llms.anthropic.chat import AnthropicChatCompletion from .llms.anthropic.completion import AnthropicTextCompletion -from .llms.azure_ai import AzureAIChatCompletion, AzureAIEmbedding +from .llms.azure_ai.chat import AzureAIChatCompletion +from .llms.azure_ai.embed import AzureAIEmbedding from .llms.azure_text import AzureTextCompletion from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params @@ -2411,8 +2412,9 @@ def completion( aws_bedrock_client.meta.region_name ) - if model in litellm.BEDROCK_CONVERSE_MODELS: + base_model = litellm.AmazonConverseConfig()._get_base_model(model) + if base_model in litellm.BEDROCK_CONVERSE_MODELS: response = bedrock_converse_chat_completion.completion( model=model, messages=messages, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 2831f1a5c..48b2a9322 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -990,6 +990,28 @@ "mode": "chat", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice" }, + "azure_ai/cohere-rerank-v3-multilingual": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, + "azure_ai/cohere-rerank-v3-english": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, "azure_ai/Cohere-embed-v3-english": { "max_tokens": 512, "max_input_tokens": 512, @@ -3114,6 +3136,50 @@ "litellm_provider": "cohere", "mode": "completion" }, + "rerank-english-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-english-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, "embed-english-v3.0": { "max_tokens": 512, "max_input_tokens": 512, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index c52972be0..a219b7997 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -11,7 +11,11 @@ model_list: api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001 vertex_project: "adroit-crow-413218" vertex_location: "us-central1" - + - model_name: fake-azure-endpoint + litellm_params: + model: openai/429 + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app - model_name: fake-openai-endpoint litellm_params: model: gpt-3.5-turbo @@ -23,6 +27,11 @@ model_list: litellm_params: model: cohere/rerank-english-v3.0 api_key: os.environ/COHERE_API_KEY + - model_name: azure-rerank-english-v3.0 + litellm_params: + model: azure_ai/rerank-english-v3.0 + api_base: os.environ/AZURE_AI_COHERE_API_BASE + api_key: os.environ/AZURE_AI_COHERE_API_KEY - model_name: "databricks/*" litellm_params: model: "databricks/*" @@ -43,9 +52,19 @@ model_list: model: "vertex_ai/gemini-flash-experimental" litellm_settings: - success_callback: ["langfuse", "prometheus"] - failure_callback: ["prometheus"] + callbacks: ["prometheus"] + redact_user_api_key_info: true + + default_team_settings: + - team_id: "09ae376d-f6c8-42cd-88be-59717135684d" # team 1 + success_callbacks: ["langfuse"] + langfuse_public_key: "pk-lf-1" + langfuse_secret: "sk-lf-1" + langfuse_host: "" + + - team_id: "e5db79db-d623-4a5b-afd5-162be56074df" # team2 + success_callback: ["langfuse"] + langfuse_public_key: "pk-lf-2" + langfuse_secret: "sk-lf-2" + langfuse_host: "" -general_settings: - proxy_budget_rescheduler_min_time: 1 - proxy_budget_rescheduler_max_time: 1 \ No newline at end of file diff --git a/litellm/proxy/db/create_views.py b/litellm/proxy/db/create_views.py index a83587478..bbf1c5dfb 100644 --- a/litellm/proxy/db/create_views.py +++ b/litellm/proxy/db/create_views.py @@ -1,13 +1,8 @@ -from typing import TYPE_CHECKING, Any +from typing import Any from litellm import verbose_logger -if TYPE_CHECKING: - from prisma import Prisma - - _db = Prisma -else: - _db = Any +_db = Any async def create_missing_views(db: _db): diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 4f09518d1..5848fe451 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -505,7 +505,9 @@ prompt_injection_detection_obj: Optional[_OPTIONAL_PromptInjectionDetection] = N store_model_in_db: bool = False open_telemetry_logger = None ### INITIALIZE GLOBAL LOGGING OBJECT ### -proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) +proxy_logging_obj = ProxyLogging( + user_api_key_cache=user_api_key_cache, premium_user=premium_user +) ### REDIS QUEUE ### async_result = None celery_app_conn = None @@ -567,7 +569,9 @@ def get_custom_headers( try: return { - key: value for key, value in headers.items() if value not in exclude_values + key: str(value) + for key, value in headers.items() + if value not in exclude_values } except Exception as e: verbose_proxy_logger.error(f"Error setting custom headers: {e}") diff --git a/litellm/proxy/rerank_endpoints/endpoints.py b/litellm/proxy/rerank_endpoints/endpoints.py index 6bc6dc948..bc09d7fc0 100644 --- a/litellm/proxy/rerank_endpoints/endpoints.py +++ b/litellm/proxy/rerank_endpoints/endpoints.py @@ -86,7 +86,7 @@ async def rerank( model_id = hidden_params.get("model_id", None) or "" cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" - + additional_headers = hidden_params.get("additional_headers", None) or {} fastapi_response.headers.update( get_custom_headers( user_api_key_dict=user_api_key_dict, @@ -96,6 +96,7 @@ async def rerank( version=version, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), request_data=data, + **additional_headers, ) ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 8c61783a2..771fa7635 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -312,6 +312,7 @@ class ProxyLogging: def __init__( self, user_api_key_cache: DualCache, + premium_user: bool = False, ): ## INITIALIZE LITELLM CALLBACKS ## self.call_details: dict = {} @@ -334,6 +335,7 @@ class ProxyLogging: alert_types=self.alert_types, internal_usage_cache=self.internal_usage_cache.dual_cache, ) + self.premium_user = premium_user def update_values( self, @@ -394,7 +396,10 @@ class ProxyLogging: callback, internal_usage_cache=self.internal_usage_cache.dual_cache, llm_router=llm_router, + premium_user=self.premium_user, ) + if callback is None: + continue if callback not in litellm.input_callback: litellm.input_callback.append(callback) # type: ignore if callback not in litellm.success_callback: diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py index e786e4fc8..64934d381 100644 --- a/litellm/tests/test_bedrock_completion.py +++ b/litellm/tests/test_bedrock_completion.py @@ -1226,10 +1226,17 @@ def test_not_found_error(): ) -def test_bedrock_cross_region_inference(): +@pytest.mark.parametrize( + "model", + [ + # "bedrock/us.anthropic.claude-3-haiku-20240307-v1:0", + "bedrock/us.meta.llama3-2-11b-instruct-v1:0", + ], +) +def test_bedrock_cross_region_inference(model): litellm.set_verbose = True response = completion( - model="bedrock/us.anthropic.claude-3-haiku-20240307-v1:0", + model=model, messages=messages, max_tokens=10, temperature=0.1, diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index b5db9a77e..ecfaba3ae 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -1328,6 +1328,41 @@ def test_completion_cost_vertex_llama3(): assert cost == 0 +@pytest.mark.parametrize( + "model", + [ + "cohere/rerank-english-v3.0", + "azure_ai/cohere-rerank-v3-english", + ], +) +def test_completion_cost_azure_ai_rerank(model): + from litellm import RerankResponse, rerank + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + response = RerankResponse( + id="b01dbf2e-63c8-4981-9e69-32241da559ed", + results=[ + { + "document": { + "id": "1", + "text": "Paris is the capital of France.", + }, + "index": 0, + "relevance_score": 0.990732, + }, + ], + meta={}, + ) + print("response", response) + model = model + cost = completion_cost( + model=model, completion_response=response, call_type="arerank" + ) + assert cost > 0 + + def test_together_ai_embedding_completion_cost(): from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index e6d602e72..504c881fa 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -1254,6 +1254,7 @@ def test_standard_logging_payload(model, turn_off_message_logging): ] if turn_off_message_logging: assert "redacted-by-litellm" == slobject["messages"][0]["content"] + assert "redacted-by-litellm" == slobject["response"] @pytest.mark.skip(reason="Works locally. Flaky on ci/cd") diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py index a7f9ef388..2f0e4a19e 100644 --- a/litellm/tests/test_prometheus.py +++ b/litellm/tests/test_prometheus.py @@ -23,12 +23,16 @@ litellm.set_verbose = True import time +@pytest.mark.skip(reason="duplicate test of logging with callbacks") @pytest.mark.asyncio() async def test_async_prometheus_success_logging(): + from litellm.integrations.prometheus import PrometheusLogger + + pl = PrometheusLogger() run_id = str(uuid.uuid4()) + litellm.set_verbose = True - litellm.success_callback = ["prometheus"] - litellm.failure_callback = ["prometheus"] + litellm.callbacks = [pl] response = await litellm.acompletion( model="claude-instant-1.2", @@ -54,12 +58,7 @@ async def test_async_prometheus_success_logging(): await asyncio.sleep(3) # get prometheus logger - from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers - - for callback in _in_memory_loggers: - if isinstance(callback, PrometheusLogger): - test_prometheus_logger = callback - + test_prometheus_logger = pl print("done with success request") print( @@ -83,12 +82,15 @@ async def test_async_prometheus_success_logging(): @pytest.mark.asyncio() async def test_async_prometheus_success_logging_with_callbacks(): + + pl = PrometheusLogger() + run_id = str(uuid.uuid4()) litellm.set_verbose = True litellm.success_callback = [] litellm.failure_callback = [] - litellm.callbacks = ["prometheus"] + litellm.callbacks = [pl] # Get initial metric values initial_metrics = {} @@ -120,11 +122,7 @@ async def test_async_prometheus_success_logging_with_callbacks(): await asyncio.sleep(3) # get prometheus logger - from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers - - for callback in _in_memory_loggers: - if isinstance(callback, PrometheusLogger): - test_prometheus_logger = callback + test_prometheus_logger = pl print("done with success request") diff --git a/litellm/tests/test_rerank.py b/litellm/tests/test_rerank.py index c46f536a9..c5ed1efe5 100644 --- a/litellm/tests/test_rerank.py +++ b/litellm/tests/test_rerank.py @@ -185,6 +185,7 @@ async def test_rerank_custom_api_base(): } mock_response.json = return_val + mock_response.headers = {"key": "value"} mock_response.status_code = 200 expected_payload = { @@ -238,6 +239,9 @@ class TestLogger(CustomLogger): @pytest.mark.asyncio() async def test_rerank_custom_callbacks(): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + custom_logger = TestLogger() litellm.callbacks = [custom_logger] response = await litellm.arerank( diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index 9a03c857b..802d63993 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -763,6 +763,45 @@ def test_supports_response_schema(model, expected_bool): assert expected_bool == response +@pytest.mark.parametrize( + "model, expected_bool", + [ + ("gpt-3.5-turbo", True), + ("gpt-4", True), + ("command-nightly", False), + ("gemini-pro", True), + ], +) +def test_supports_function_calling_v2(model, expected_bool): + """ + Unit test for 'supports_function_calling' helper function. + """ + from litellm.utils import supports_function_calling + + response = supports_function_calling(model=model, custom_llm_provider=None) + assert expected_bool == response + + +@pytest.mark.parametrize( + "model, expected_bool", + [ + ("gpt-4-vision-preview", True), + ("gpt-3.5-turbo", False), + ("claude-3-opus-20240229", True), + ("gemini-pro-vision", True), + ("command-nightly", False), + ], +) +def test_supports_vision(model, expected_bool): + """ + Unit test for 'supports_vision' helper function. + """ + from litellm.utils import supports_vision + + response = supports_vision(model=model, custom_llm_provider=None) + assert expected_bool == response + + def test_usage_object_null_tokens(): """ Unit test. diff --git a/litellm/types/utils.py b/litellm/types/utils.py index ab1ffe101..3dc644030 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -59,6 +59,7 @@ class ModelInfo(TypedDict, total=False): input_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models + input_cost_per_query: Optional[float] # only for rerank models input_cost_per_image: Optional[float] # only for vertex ai models input_cost_per_audio_per_second: Optional[float] # only for vertex ai models input_cost_per_video_per_second: Optional[float] # only for vertex ai models diff --git a/litellm/utils.py b/litellm/utils.py index cce70c6f8..fe3ef51f1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -367,7 +367,7 @@ def function_setup( callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore callback, internal_usage_cache=None, llm_router=None ) - if any( + if callback is None or any( isinstance(cb, type(callback)) for cb in litellm._async_success_callback ): # don't double add a callback @@ -431,7 +431,7 @@ def function_setup( ) # don't double add a callback - if not any( + if callback_class is not None and not any( isinstance(cb, type(callback_class)) for cb in litellm.callbacks ): litellm.callbacks.append(callback_class) # type: ignore @@ -2148,50 +2148,67 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> return False -def supports_function_calling(model: str) -> bool: +def supports_function_calling( + model: str, custom_llm_provider: Optional[str] = None +) -> bool: """ Check if the given model supports function calling and return a boolean value. Parameters: model (str): The model name to be checked. + custom_llm_provider (Optional[str]): The provider to be checked. Returns: bool: True if the model supports function calling, False otherwise. Raises: - Exception: If the given model is not found in model_prices_and_context_window.json. + Exception: If the given model is not found or there's an error in retrieval. """ + try: + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, custom_llm_provider=custom_llm_provider + ) + + model_info = litellm.get_model_info( + model=model, custom_llm_provider=custom_llm_provider + ) - if model in litellm.model_cost: - model_info = litellm.model_cost[model] if model_info.get("supports_function_calling", False) is True: return True return False - else: + except Exception as e: raise Exception( - f"Model not supports function calling. You passed model={model}." + f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" ) -def supports_vision(model: str): +def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool: """ Check if the given model supports vision and return a boolean value. Parameters: model (str): The model name to be checked. + custom_llm_provider (Optional[str]): The provider to be checked. Returns: bool: True if the model supports vision, False otherwise. - - Raises: - Exception: If the given model is not found in model_prices_and_context_window.json. """ - if model in litellm.model_cost: - model_info = litellm.model_cost[model] + try: + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, custom_llm_provider=custom_llm_provider + ) + + model_info = litellm.get_model_info( + model=model, custom_llm_provider=custom_llm_provider + ) + if model_info.get("supports_vision", False) is True: return True return False - else: + except Exception as e: + verbose_logger.error( + f"Model not found or error in checking vision support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" + ) return False @@ -4755,6 +4772,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod input_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models + input_cost_per_query: Optional[float] # only for rerank models input_cost_per_image: Optional[float] # only for vertex ai models input_cost_per_audio_per_second: Optional[float] # only for vertex ai models input_cost_per_video_per_second: Optional[float] # only for vertex ai models @@ -5000,6 +5018,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod input_cost_per_token_above_128k_tokens=_model_info.get( "input_cost_per_token_above_128k_tokens", None ), + input_cost_per_query=_model_info.get("input_cost_per_query", None), output_cost_per_token=_output_cost_per_token, output_cost_per_character=_model_info.get( "output_cost_per_character", None diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 2831f1a5c..48b2a9322 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -990,6 +990,28 @@ "mode": "chat", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice" }, + "azure_ai/cohere-rerank-v3-multilingual": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, + "azure_ai/cohere-rerank-v3-english": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, "azure_ai/Cohere-embed-v3-english": { "max_tokens": 512, "max_input_tokens": 512, @@ -3114,6 +3136,50 @@ "litellm_provider": "cohere", "mode": "completion" }, + "rerank-english-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-english-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, "embed-english-v3.0": { "max_tokens": 512, "max_input_tokens": 512, diff --git a/tests/proxy_admin_ui_tests/test_key_management.py b/tests/proxy_admin_ui_tests/test_key_management.py index e46b795ff..3f44c12a3 100644 --- a/tests/proxy_admin_ui_tests/test_key_management.py +++ b/tests/proxy_admin_ui_tests/test_key_management.py @@ -125,7 +125,6 @@ async def test_regenerate_api_key(prisma_client): setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") await litellm.proxy.proxy_server.prisma_client.connect() - import uuid # generate new key key_alias = f"test_alias_regenerate_key-{uuid.uuid4()}"