forked from phoenix/litellm-mirror
* LiteLLM Minor Fixes & Improvements (09/26/2024) (#5925)
* fix(litellm_logging.py): don't initialize prometheus_logger if non premium user
Prevents bad error messages in logs
Fixes https://github.com/BerriAI/litellm/issues/5897
* Add Support for Custom Providers in Vision and Function Call Utils (#5688)
* Add Support for Custom Providers in Vision and Function Call Utils Lookup
* Remove parallel function call due to missing model info param
* Add Unit Tests for Vision and Function Call Changes
* fix-#5920: set header value to string to fix "'int' object has no att… (#5922)
* LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)
* LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842)
* feat(auth_utils.py): enable admin to allow client-side credentials to be passed
Makes it easier for devs to experiment with finetuned fireworks ai models
* feat(router.py): allow setting configurable_clientside_auth_params for a model
Closes https://github.com/BerriAI/litellm/issues/5843
* build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit
Fixes https://github.com/BerriAI/litellm/issues/5850
* fix(azure_ai/): support content list for azure ai
Fixes https://github.com/BerriAI/litellm/issues/4237
* fix(litellm_logging.py): always set saved_cache_cost
Set to 0 by default
* fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing
handles calling 405b+ size models
* fix(slack_alerting.py): fix error alerting for failed spend tracking
Fixes regression with slack alerting error monitoring
* fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error
* docs(bedrock.md): add llama3-1 models
* test: fix tests
* fix(azure_ai/chat): fix transformation for azure ai calls
* feat(azure_ai/embed): Add azure ai embeddings support
Closes https://github.com/BerriAI/litellm/issues/5861
* fix(azure_ai/embed): enable async embedding
* feat(azure_ai/embed): support azure ai multimodal embeddings
* fix(azure_ai/embed): support async multi modal embeddings
* feat(together_ai/embed): support together ai embedding calls
* feat(rerank/main.py): log source documents for rerank endpoints to langfuse
improves rerank endpoint logging
* fix(langfuse.py): support logging `/audio/speech` input to langfuse
* test(test_embedding.py): fix test
* test(test_completion_cost.py): fix helper util
* fix-#5920: set header value to string to fix "'int' object has no attribute 'encode'"
---------
Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com>
* Revert "fix-#5920: set header value to string to fix "'int' object has no att…" (#5926)
This reverts commit a554ae2695
.
* build(model_prices_and_context_window.json): add azure ai cohere rerank model pricing
Enables cost tracking for azure ai cohere rerank models
* fix(litellm_logging.py): fix debug log to be clearer
Closes https://github.com/BerriAI/litellm/issues/5909
* test(test_utils.py): fix test name
* fix(azure_ai/cost_calculator.py): support cost tracking for azure ai rerank models
* fix(azure_ai): fix azure ai base model cost tracking for rerank endpoints
* fix(converse_handler.py): support new llama 3-2 models
Fixes https://github.com/BerriAI/litellm/issues/5901
* fix(litellm_logging.py): ensure response is redacted for standard message logging
Fixes https://github.com/BerriAI/litellm/issues/5890#issuecomment-2378242360
* fix(cost_calculator.py): use 'get_model_info' for cohere rerank cost calculation
allows user to set custom cost for model
* fix(config.yml): fix docker hub auht
* build(config.yml): add docker auth to all tests
* fix(db/create_views.py): fix linting error
* fix(main.py): fix circular import
* fix(azure_ai/__init__.py): fix circular import
* fix(main.py): fix import
* fix: fix linting errors
* test: fix test
* fix(proxy_server.py): pass premium user value on startup
used for prometheus init
---------
Co-authored-by: Cole Murray <colemurray.cs@gmail.com>
Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com>
* handle streaming for azure ai studio error
* [Perf Proxy] parallel request limiter - use one cache update call (#5932)
* fix parallel request limiter - use one cache update call
* ci/cd run again
* run ci/cd again
* use docker username password
* fix config.yml
* fix config
* fix config
* fix config.yml
* ci/cd run again
* use correct typing for batch set cache
* fix async_set_cache_pipeline
* fix only check user id tpm / rpm limits when limits set
* fix test_openai_azure_embedding_with_oidc_and_cf
* test: fix test
* test(test_rerank.py): fix test
---------
Co-authored-by: Cole Murray <colemurray.cs@gmail.com>
Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com>
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
parent
789ce6b747
commit
bd17424c4b
29 changed files with 564 additions and 104 deletions
|
@ -280,6 +280,9 @@ jobs:
|
|||
installing_litellm_on_python:
|
||||
docker:
|
||||
- image: circleci/python:3.8
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
|
||||
steps:
|
||||
|
|
|
@ -22,6 +22,12 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha
|
|||
from litellm.llms.anthropic.cost_calculation import (
|
||||
cost_per_token as anthropic_cost_per_token,
|
||||
)
|
||||
from litellm.llms.azure_ai.cost_calculator import (
|
||||
cost_per_query as azure_ai_rerank_cost_per_query,
|
||||
)
|
||||
from litellm.llms.cohere.cost_calculator import (
|
||||
cost_per_query as cohere_rerank_cost_per_query,
|
||||
)
|
||||
from litellm.llms.databricks.cost_calculator import (
|
||||
cost_per_token as databricks_cost_per_token,
|
||||
)
|
||||
|
@ -85,6 +91,8 @@ def cost_per_token(
|
|||
### CUSTOM PRICING ###
|
||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||
custom_cost_per_second: Optional[float] = None,
|
||||
### NUMBER OF QUERIES ###
|
||||
number_of_queries: Optional[int] = None,
|
||||
### CALL TYPE ###
|
||||
call_type: Literal[
|
||||
"embedding",
|
||||
|
@ -190,7 +198,6 @@ def cost_per_token(
|
|||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
cost_router = google_cost_router(
|
||||
model=model_without_prefix,
|
||||
|
@ -252,12 +259,10 @@ def cost_per_token(
|
|||
)
|
||||
return prompt_cost, completion_cost
|
||||
elif call_type == "arerank" or call_type == "rerank":
|
||||
completion_tokens_cost_usd_dollar = rerank_cost(
|
||||
return rerank_cost(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
|
@ -793,7 +798,6 @@ def response_cost_calculator(
|
|||
if custom_pricing is True: # override defaults if custom pricing is set
|
||||
base_model = model
|
||||
# base_model defaults to None if not set on model_info
|
||||
|
||||
response_cost = completion_cost(
|
||||
completion_response=response_object,
|
||||
call_type=call_type,
|
||||
|
@ -808,23 +812,27 @@ def response_cost_calculator(
|
|||
def rerank_cost(
|
||||
model: str,
|
||||
custom_llm_provider: Optional[str],
|
||||
) -> float:
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Returns
|
||||
- float or None: cost of response OR none if error.
|
||||
"""
|
||||
default_num_queries = 1
|
||||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
try:
|
||||
if custom_llm_provider == "cohere":
|
||||
return 0.002
|
||||
return cohere_rerank_cost_per_query(
|
||||
model=model, num_queries=default_num_queries
|
||||
)
|
||||
elif custom_llm_provider == "azure_ai":
|
||||
return azure_ai_rerank_cost_per_query(
|
||||
model=model, num_queries=default_num_queries
|
||||
)
|
||||
raise ValueError(
|
||||
f"invalid custom_llm_provider for rerank model: {model}, custom_llm_provider: {custom_llm_provider}"
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
f"litellm.cost_calculator.py::rerank_cost - Exception occurred - {str(e)}"
|
||||
)
|
||||
raise e
|
||||
|
|
|
@ -31,6 +31,7 @@ from litellm.litellm_core_utils.redact_messages import (
|
|||
redact_message_input_output_from_custom_logger,
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.proxy._types import CommonProxyErrors
|
||||
from litellm.rerank_api.types import RerankResponse
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
|
@ -97,7 +98,9 @@ try:
|
|||
GenericAPILogger,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Exception import enterprise features {str(e)}")
|
||||
verbose_logger.debug(
|
||||
f"[Non-Blocking] Unable to import GenericAPILogger - LiteLLM Enterprise Feature - {str(e)}"
|
||||
)
|
||||
|
||||
_in_memory_loggers: List[Any] = []
|
||||
|
||||
|
@ -2140,7 +2143,8 @@ def _init_custom_logger_compatible_class(
|
|||
llm_router: Optional[
|
||||
Any
|
||||
], # expect litellm.Router, but typing errors due to circular import
|
||||
) -> CustomLogger:
|
||||
premium_user: bool = False,
|
||||
) -> Optional[CustomLogger]:
|
||||
if logging_integration == "lago":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, LagoLogger):
|
||||
|
@ -2174,13 +2178,19 @@ def _init_custom_logger_compatible_class(
|
|||
_in_memory_loggers.append(_langsmith_logger)
|
||||
return _langsmith_logger # type: ignore
|
||||
elif logging_integration == "prometheus":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
return callback # type: ignore
|
||||
if premium_user:
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
return callback # type: ignore
|
||||
|
||||
_prometheus_logger = PrometheusLogger()
|
||||
_in_memory_loggers.append(_prometheus_logger)
|
||||
return _prometheus_logger # type: ignore
|
||||
_prometheus_logger = PrometheusLogger()
|
||||
_in_memory_loggers.append(_prometheus_logger)
|
||||
return _prometheus_logger # type: ignore
|
||||
else:
|
||||
verbose_logger.warning(
|
||||
f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
return None
|
||||
elif logging_integration == "datadog":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, DataDogLogger):
|
||||
|
@ -2411,6 +2421,7 @@ def get_standard_logging_object_payload(
|
|||
response_obj = init_response_obj
|
||||
else:
|
||||
response_obj = {}
|
||||
|
||||
# standardize this function to be used across, s3, dynamoDB, langfuse logging
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
|
@ -2546,6 +2557,16 @@ def get_standard_logging_object_payload(
|
|||
|
||||
response_cost: float = kwargs.get("response_cost", 0) or 0.0
|
||||
|
||||
if response_obj is not None:
|
||||
final_response_obj: Optional[Union[dict, str, list]] = response_obj
|
||||
elif isinstance(init_response_obj, list) or isinstance(init_response_obj, str):
|
||||
final_response_obj = init_response_obj
|
||||
else:
|
||||
final_response_obj = None
|
||||
|
||||
if litellm.turn_off_message_logging:
|
||||
final_response_obj = "redacted-by-litellm"
|
||||
|
||||
payload: StandardLoggingPayload = StandardLoggingPayload(
|
||||
id=str(id),
|
||||
call_type=call_type or "",
|
||||
|
@ -2569,9 +2590,7 @@ def get_standard_logging_object_payload(
|
|||
model_id=_model_id,
|
||||
requester_ip_address=clean_metadata.get("requester_ip_address", None),
|
||||
messages=kwargs.get("messages"),
|
||||
response=( # type: ignore
|
||||
response_obj if len(response_obj.keys()) > 0 else init_response_obj # type: ignore
|
||||
),
|
||||
response=final_response_obj,
|
||||
model_parameters=kwargs.get("optional_params", None),
|
||||
hidden_params=clean_hidden_params,
|
||||
model_map_information=model_cost_information,
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
from .chat.handler import AzureAIChatCompletion
|
||||
from .embed.handler import AzureAIEmbedding
|
||||
from .rerank.handler import AzureAIRerank
|
1
litellm/llms/azure_ai/chat/__init__.py
Normal file
1
litellm/llms/azure_ai/chat/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .handler import AzureAIChatCompletion
|
|
@ -0,0 +1,33 @@
|
|||
"""
|
||||
Handles custom cost calculation for Azure AI models.
|
||||
|
||||
Custom cost calculation for Azure AI models only requied for rerank.
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from litellm.types.utils import Usage
|
||||
from litellm.utils import get_model_info
|
||||
|
||||
|
||||
def cost_per_query(model: str, num_queries: int = 1) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per query for a given rerank model.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
"""
|
||||
model_info = get_model_info(model=model, custom_llm_provider="azure_ai")
|
||||
|
||||
if (
|
||||
"input_cost_per_query" not in model_info
|
||||
or model_info["input_cost_per_query"] is None
|
||||
):
|
||||
return 0.0, 0.0
|
||||
|
||||
prompt_cost = model_info["input_cost_per_query"] * num_queries
|
||||
|
||||
return prompt_cost, 0.0
|
1
litellm/llms/azure_ai/embed/__init__.py
Normal file
1
litellm/llms/azure_ai/embed/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .handler import AzureAIEmbedding
|
|
@ -8,6 +8,57 @@ from litellm.rerank_api.types import RerankResponse
|
|||
|
||||
|
||||
class AzureAIRerank(CohereRerank):
|
||||
|
||||
def get_base_model(self, azure_model_group: Optional[str]) -> Optional[str]:
|
||||
if azure_model_group is None:
|
||||
return None
|
||||
if azure_model_group == "offer-cohere-rerank-mul-paygo":
|
||||
return "azure_ai/cohere-rerank-v3-multilingual"
|
||||
if azure_model_group == "offer-cohere-rerank-eng-paygo":
|
||||
return "azure_ai/cohere-rerank-v3-english"
|
||||
return azure_model_group
|
||||
|
||||
async def async_azure_rerank(
|
||||
self,
|
||||
model: str,
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
query: str,
|
||||
documents: List[Union[str, Dict[str, Any]]],
|
||||
headers: Optional[dict],
|
||||
litellm_logging_obj: LiteLLMLoggingObj,
|
||||
top_n: Optional[int] = None,
|
||||
rank_fields: Optional[List[str]] = None,
|
||||
return_documents: Optional[bool] = True,
|
||||
max_chunks_per_doc: Optional[int] = None,
|
||||
):
|
||||
returned_response: RerankResponse = await super().rerank( # type: ignore
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
_is_async=True,
|
||||
headers=headers,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
# get base model
|
||||
additional_headers = (
|
||||
returned_response._hidden_params.get("additional_headers") or {}
|
||||
)
|
||||
|
||||
base_model = self.get_base_model(
|
||||
additional_headers.get("llm_provider-azureml-model-group")
|
||||
)
|
||||
returned_response._hidden_params["model"] = base_model
|
||||
|
||||
return returned_response
|
||||
|
||||
def rerank(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -36,17 +87,39 @@ class AzureAIRerank(CohereRerank):
|
|||
if not api_base_url.path.endswith("/v1/rerank"):
|
||||
api_base = str(api_base_url.copy_with(path="/v1/rerank"))
|
||||
|
||||
return super().rerank(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
_is_async=_is_async,
|
||||
headers=headers,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
if _is_async:
|
||||
return self.async_azure_rerank( # type: ignore
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
headers=headers,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
else:
|
||||
returned_response = super().rerank(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
_is_async=_is_async,
|
||||
headers=headers,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
# get base model
|
||||
base_model = self.get_base_model(
|
||||
returned_response._hidden_params.get("llm_provider-azureml-model-group")
|
||||
)
|
||||
returned_response._hidden_params["model"] = base_model
|
||||
return returned_response
|
||||
|
|
|
@ -20,17 +20,9 @@ from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_ca
|
|||
|
||||
BEDROCK_CONVERSE_MODELS = [
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"us.anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"anthropic.claude-3-opus-20240229-v1:0",
|
||||
"us.anthropic.claude-3-opus-20240229-v1:0",
|
||||
"eu.anthropic.claude-3-opus-20240229-v1:0",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"us.anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"eu.anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"eu.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"anthropic.claude-v2",
|
||||
"anthropic.claude-v2:1",
|
||||
"anthropic.claude-v1",
|
||||
|
@ -43,6 +35,11 @@ BEDROCK_CONVERSE_MODELS = [
|
|||
"meta.llama3-1-405b-instruct-v1:0",
|
||||
"meta.llama3-70b-instruct-v1:0",
|
||||
"mistral.mistral-large-2407-v1:0",
|
||||
"meta.llama3-2-1b-instruct-v1:0",
|
||||
"meta.llama3-2-3b-instruct-v1:0",
|
||||
"meta.llama3-2-11b-instruct-v1:0",
|
||||
"meta.llama3-2-90b-instruct-v1:0",
|
||||
"meta.llama3-2-405b-instruct-v1:0",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -430,3 +430,22 @@ class AmazonConverseConfig:
|
|||
setattr(model_response, "trace", completion_response["trace"])
|
||||
|
||||
return model_response
|
||||
|
||||
def _supported_cross_region_inference_region(self) -> List[str]:
|
||||
"""
|
||||
Abbreviations of regions AWS Bedrock supports for cross region inference
|
||||
"""
|
||||
return ["us", "eu"]
|
||||
|
||||
def _get_base_model(self, model: str) -> str:
|
||||
"""
|
||||
Get the base model from the given model name.
|
||||
|
||||
Handle model names like - "us.meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
|
||||
AND "meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
|
||||
"""
|
||||
|
||||
potential_region = model.split(".", 1)[0]
|
||||
if potential_region in self._supported_cross_region_inference_region():
|
||||
return model.split(".", 1)[1]
|
||||
return model
|
||||
|
|
31
litellm/llms/cohere/cost_calculator.py
Normal file
31
litellm/llms/cohere/cost_calculator.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
"""
|
||||
Custom cost calculator for Cohere rerank models
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from litellm.utils import get_model_info
|
||||
|
||||
|
||||
def cost_per_query(model: str, num_queries: int = 1) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per query for a given rerank model.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
"""
|
||||
|
||||
model_info = get_model_info(model=model, custom_llm_provider="cohere")
|
||||
|
||||
if (
|
||||
"input_cost_per_query" not in model_info
|
||||
or model_info["input_cost_per_query"] is None
|
||||
):
|
||||
return 0.0, 0.0
|
||||
|
||||
prompt_cost = model_info["input_cost_per_query"] * num_queries
|
||||
|
||||
return prompt_cost, 0.0
|
|
@ -6,9 +6,6 @@ LiteLLM supports the re rank API format, no paramter transformation occurs
|
|||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base import BaseLLM
|
||||
|
@ -65,7 +62,6 @@ class CohereRerank(BaseLLM):
|
|||
)
|
||||
|
||||
request_data_dict = request_data.dict(exclude_none=True)
|
||||
|
||||
## LOGGING
|
||||
litellm_logging_obj.pre_call(
|
||||
input=request_data_dict,
|
||||
|
@ -78,7 +74,7 @@ class CohereRerank(BaseLLM):
|
|||
)
|
||||
|
||||
if _is_async:
|
||||
return self.async_rerank(request_data_dict=request_data_dict, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method
|
||||
return self.async_rerank(request_data=request_data, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method
|
||||
|
||||
client = _get_httpx_client()
|
||||
response = client.post(
|
||||
|
@ -87,15 +83,26 @@ class CohereRerank(BaseLLM):
|
|||
json=request_data_dict,
|
||||
)
|
||||
|
||||
return RerankResponse(**response.json())
|
||||
returned_response = RerankResponse(**response.json())
|
||||
|
||||
_response_headers = response.headers
|
||||
|
||||
llm_response_headers = {
|
||||
"{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
|
||||
}
|
||||
returned_response._hidden_params["additional_headers"] = llm_response_headers
|
||||
|
||||
return returned_response
|
||||
|
||||
async def async_rerank(
|
||||
self,
|
||||
request_data_dict: Dict[str, Any],
|
||||
request_data: RerankRequest,
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
headers: dict,
|
||||
) -> RerankResponse:
|
||||
request_data_dict = request_data.dict(exclude_none=True)
|
||||
|
||||
client = get_async_httpx_client(llm_provider=litellm.LlmProviders.COHERE)
|
||||
|
||||
response = await client.post(
|
||||
|
@ -104,4 +111,14 @@ class CohereRerank(BaseLLM):
|
|||
json=request_data_dict,
|
||||
)
|
||||
|
||||
return RerankResponse(**response.json())
|
||||
returned_response = RerankResponse(**response.json())
|
||||
|
||||
_response_headers = dict(response.headers)
|
||||
|
||||
llm_response_headers = {
|
||||
"{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
|
||||
}
|
||||
returned_response._hidden_params["additional_headers"] = llm_response_headers
|
||||
returned_response._hidden_params["model"] = request_data.model
|
||||
|
||||
return returned_response
|
||||
|
|
|
@ -83,7 +83,8 @@ from .llms import (
|
|||
from .llms.AI21 import completion as ai21
|
||||
from .llms.anthropic.chat import AnthropicChatCompletion
|
||||
from .llms.anthropic.completion import AnthropicTextCompletion
|
||||
from .llms.azure_ai import AzureAIChatCompletion, AzureAIEmbedding
|
||||
from .llms.azure_ai.chat import AzureAIChatCompletion
|
||||
from .llms.azure_ai.embed import AzureAIEmbedding
|
||||
from .llms.azure_text import AzureTextCompletion
|
||||
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
|
||||
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
|
||||
|
@ -2411,8 +2412,9 @@ def completion(
|
|||
aws_bedrock_client.meta.region_name
|
||||
)
|
||||
|
||||
if model in litellm.BEDROCK_CONVERSE_MODELS:
|
||||
base_model = litellm.AmazonConverseConfig()._get_base_model(model)
|
||||
|
||||
if base_model in litellm.BEDROCK_CONVERSE_MODELS:
|
||||
response = bedrock_converse_chat_completion.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
|
|
@ -990,6 +990,28 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/cohere-rerank-v3-multilingual": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"azure_ai/cohere-rerank-v3-english": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"azure_ai/Cohere-embed-v3-english": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
|
@ -3114,6 +3136,50 @@
|
|||
"litellm_provider": "cohere",
|
||||
"mode": "completion"
|
||||
},
|
||||
"rerank-english-v3.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-multilingual-v3.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-english-v2.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-multilingual-v2.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
|
|
|
@ -11,7 +11,11 @@ model_list:
|
|||
api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
|
||||
- model_name: fake-azure-endpoint
|
||||
litellm_params:
|
||||
model: openai/429
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
|
@ -23,6 +27,11 @@ model_list:
|
|||
litellm_params:
|
||||
model: cohere/rerank-english-v3.0
|
||||
api_key: os.environ/COHERE_API_KEY
|
||||
- model_name: azure-rerank-english-v3.0
|
||||
litellm_params:
|
||||
model: azure_ai/rerank-english-v3.0
|
||||
api_base: os.environ/AZURE_AI_COHERE_API_BASE
|
||||
api_key: os.environ/AZURE_AI_COHERE_API_KEY
|
||||
- model_name: "databricks/*"
|
||||
litellm_params:
|
||||
model: "databricks/*"
|
||||
|
@ -43,9 +52,19 @@ model_list:
|
|||
model: "vertex_ai/gemini-flash-experimental"
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse", "prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
callbacks: ["prometheus"]
|
||||
redact_user_api_key_info: true
|
||||
|
||||
default_team_settings:
|
||||
- team_id: "09ae376d-f6c8-42cd-88be-59717135684d" # team 1
|
||||
success_callbacks: ["langfuse"]
|
||||
langfuse_public_key: "pk-lf-1"
|
||||
langfuse_secret: "sk-lf-1"
|
||||
langfuse_host: ""
|
||||
|
||||
- team_id: "e5db79db-d623-4a5b-afd5-162be56074df" # team2
|
||||
success_callback: ["langfuse"]
|
||||
langfuse_public_key: "pk-lf-2"
|
||||
langfuse_secret: "sk-lf-2"
|
||||
langfuse_host: ""
|
||||
|
||||
general_settings:
|
||||
proxy_budget_rescheduler_min_time: 1
|
||||
proxy_budget_rescheduler_max_time: 1
|
|
@ -1,13 +1,8 @@
|
|||
from typing import TYPE_CHECKING, Any
|
||||
from typing import Any
|
||||
|
||||
from litellm import verbose_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from prisma import Prisma
|
||||
|
||||
_db = Prisma
|
||||
else:
|
||||
_db = Any
|
||||
_db = Any
|
||||
|
||||
|
||||
async def create_missing_views(db: _db):
|
||||
|
|
|
@ -505,7 +505,9 @@ prompt_injection_detection_obj: Optional[_OPTIONAL_PromptInjectionDetection] = N
|
|||
store_model_in_db: bool = False
|
||||
open_telemetry_logger = None
|
||||
### INITIALIZE GLOBAL LOGGING OBJECT ###
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
||||
proxy_logging_obj = ProxyLogging(
|
||||
user_api_key_cache=user_api_key_cache, premium_user=premium_user
|
||||
)
|
||||
### REDIS QUEUE ###
|
||||
async_result = None
|
||||
celery_app_conn = None
|
||||
|
@ -567,7 +569,9 @@ def get_custom_headers(
|
|||
|
||||
try:
|
||||
return {
|
||||
key: value for key, value in headers.items() if value not in exclude_values
|
||||
key: str(value)
|
||||
for key, value in headers.items()
|
||||
if value not in exclude_values
|
||||
}
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(f"Error setting custom headers: {e}")
|
||||
|
|
|
@ -86,7 +86,7 @@ async def rerank(
|
|||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
additional_headers = hidden_params.get("additional_headers", None) or {}
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
|
@ -96,6 +96,7 @@ async def rerank(
|
|||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
request_data=data,
|
||||
**additional_headers,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
@ -312,6 +312,7 @@ class ProxyLogging:
|
|||
def __init__(
|
||||
self,
|
||||
user_api_key_cache: DualCache,
|
||||
premium_user: bool = False,
|
||||
):
|
||||
## INITIALIZE LITELLM CALLBACKS ##
|
||||
self.call_details: dict = {}
|
||||
|
@ -334,6 +335,7 @@ class ProxyLogging:
|
|||
alert_types=self.alert_types,
|
||||
internal_usage_cache=self.internal_usage_cache.dual_cache,
|
||||
)
|
||||
self.premium_user = premium_user
|
||||
|
||||
def update_values(
|
||||
self,
|
||||
|
@ -394,7 +396,10 @@ class ProxyLogging:
|
|||
callback,
|
||||
internal_usage_cache=self.internal_usage_cache.dual_cache,
|
||||
llm_router=llm_router,
|
||||
premium_user=self.premium_user,
|
||||
)
|
||||
if callback is None:
|
||||
continue
|
||||
if callback not in litellm.input_callback:
|
||||
litellm.input_callback.append(callback) # type: ignore
|
||||
if callback not in litellm.success_callback:
|
||||
|
|
|
@ -1226,10 +1226,17 @@ def test_not_found_error():
|
|||
)
|
||||
|
||||
|
||||
def test_bedrock_cross_region_inference():
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
# "bedrock/us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"bedrock/us.meta.llama3-2-11b-instruct-v1:0",
|
||||
],
|
||||
)
|
||||
def test_bedrock_cross_region_inference(model):
|
||||
litellm.set_verbose = True
|
||||
response = completion(
|
||||
model="bedrock/us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
model=model,
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
|
|
|
@ -1328,6 +1328,41 @@ def test_completion_cost_vertex_llama3():
|
|||
assert cost == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"cohere/rerank-english-v3.0",
|
||||
"azure_ai/cohere-rerank-v3-english",
|
||||
],
|
||||
)
|
||||
def test_completion_cost_azure_ai_rerank(model):
|
||||
from litellm import RerankResponse, rerank
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
response = RerankResponse(
|
||||
id="b01dbf2e-63c8-4981-9e69-32241da559ed",
|
||||
results=[
|
||||
{
|
||||
"document": {
|
||||
"id": "1",
|
||||
"text": "Paris is the capital of France.",
|
||||
},
|
||||
"index": 0,
|
||||
"relevance_score": 0.990732,
|
||||
},
|
||||
],
|
||||
meta={},
|
||||
)
|
||||
print("response", response)
|
||||
model = model
|
||||
cost = completion_cost(
|
||||
model=model, completion_response=response, call_type="arerank"
|
||||
)
|
||||
assert cost > 0
|
||||
|
||||
|
||||
def test_together_ai_embedding_completion_cost():
|
||||
from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage
|
||||
|
||||
|
|
|
@ -1254,6 +1254,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
|
|||
]
|
||||
if turn_off_message_logging:
|
||||
assert "redacted-by-litellm" == slobject["messages"][0]["content"]
|
||||
assert "redacted-by-litellm" == slobject["response"]
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Works locally. Flaky on ci/cd")
|
||||
|
|
|
@ -23,12 +23,16 @@ litellm.set_verbose = True
|
|||
import time
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="duplicate test of logging with callbacks")
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_prometheus_success_logging():
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
|
||||
pl = PrometheusLogger()
|
||||
run_id = str(uuid.uuid4())
|
||||
|
||||
litellm.set_verbose = True
|
||||
litellm.success_callback = ["prometheus"]
|
||||
litellm.failure_callback = ["prometheus"]
|
||||
litellm.callbacks = [pl]
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="claude-instant-1.2",
|
||||
|
@ -54,12 +58,7 @@ async def test_async_prometheus_success_logging():
|
|||
await asyncio.sleep(3)
|
||||
|
||||
# get prometheus logger
|
||||
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
test_prometheus_logger = callback
|
||||
|
||||
test_prometheus_logger = pl
|
||||
print("done with success request")
|
||||
|
||||
print(
|
||||
|
@ -83,12 +82,15 @@ async def test_async_prometheus_success_logging():
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_prometheus_success_logging_with_callbacks():
|
||||
|
||||
pl = PrometheusLogger()
|
||||
|
||||
run_id = str(uuid.uuid4())
|
||||
litellm.set_verbose = True
|
||||
|
||||
litellm.success_callback = []
|
||||
litellm.failure_callback = []
|
||||
litellm.callbacks = ["prometheus"]
|
||||
litellm.callbacks = [pl]
|
||||
|
||||
# Get initial metric values
|
||||
initial_metrics = {}
|
||||
|
@ -120,11 +122,7 @@ async def test_async_prometheus_success_logging_with_callbacks():
|
|||
await asyncio.sleep(3)
|
||||
|
||||
# get prometheus logger
|
||||
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
test_prometheus_logger = callback
|
||||
test_prometheus_logger = pl
|
||||
|
||||
print("done with success request")
|
||||
|
||||
|
|
|
@ -185,6 +185,7 @@ async def test_rerank_custom_api_base():
|
|||
}
|
||||
|
||||
mock_response.json = return_val
|
||||
mock_response.headers = {"key": "value"}
|
||||
mock_response.status_code = 200
|
||||
|
||||
expected_payload = {
|
||||
|
@ -238,6 +239,9 @@ class TestLogger(CustomLogger):
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_rerank_custom_callbacks():
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
custom_logger = TestLogger()
|
||||
litellm.callbacks = [custom_logger]
|
||||
response = await litellm.arerank(
|
||||
|
|
|
@ -763,6 +763,45 @@ def test_supports_response_schema(model, expected_bool):
|
|||
assert expected_bool == response
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_bool",
|
||||
[
|
||||
("gpt-3.5-turbo", True),
|
||||
("gpt-4", True),
|
||||
("command-nightly", False),
|
||||
("gemini-pro", True),
|
||||
],
|
||||
)
|
||||
def test_supports_function_calling_v2(model, expected_bool):
|
||||
"""
|
||||
Unit test for 'supports_function_calling' helper function.
|
||||
"""
|
||||
from litellm.utils import supports_function_calling
|
||||
|
||||
response = supports_function_calling(model=model, custom_llm_provider=None)
|
||||
assert expected_bool == response
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_bool",
|
||||
[
|
||||
("gpt-4-vision-preview", True),
|
||||
("gpt-3.5-turbo", False),
|
||||
("claude-3-opus-20240229", True),
|
||||
("gemini-pro-vision", True),
|
||||
("command-nightly", False),
|
||||
],
|
||||
)
|
||||
def test_supports_vision(model, expected_bool):
|
||||
"""
|
||||
Unit test for 'supports_vision' helper function.
|
||||
"""
|
||||
from litellm.utils import supports_vision
|
||||
|
||||
response = supports_vision(model=model, custom_llm_provider=None)
|
||||
assert expected_bool == response
|
||||
|
||||
|
||||
def test_usage_object_null_tokens():
|
||||
"""
|
||||
Unit test.
|
||||
|
|
|
@ -59,6 +59,7 @@ class ModelInfo(TypedDict, total=False):
|
|||
input_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
input_cost_per_query: Optional[float] # only for rerank models
|
||||
input_cost_per_image: Optional[float] # only for vertex ai models
|
||||
input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||
input_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||
|
|
|
@ -367,7 +367,7 @@ def function_setup(
|
|||
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||
callback, internal_usage_cache=None, llm_router=None
|
||||
)
|
||||
if any(
|
||||
if callback is None or any(
|
||||
isinstance(cb, type(callback))
|
||||
for cb in litellm._async_success_callback
|
||||
): # don't double add a callback
|
||||
|
@ -431,7 +431,7 @@ def function_setup(
|
|||
)
|
||||
|
||||
# don't double add a callback
|
||||
if not any(
|
||||
if callback_class is not None and not any(
|
||||
isinstance(cb, type(callback_class)) for cb in litellm.callbacks
|
||||
):
|
||||
litellm.callbacks.append(callback_class) # type: ignore
|
||||
|
@ -2148,50 +2148,67 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
|
|||
return False
|
||||
|
||||
|
||||
def supports_function_calling(model: str) -> bool:
|
||||
def supports_function_calling(
|
||||
model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the given model supports function calling and return a boolean value.
|
||||
|
||||
Parameters:
|
||||
model (str): The model name to be checked.
|
||||
custom_llm_provider (Optional[str]): The provider to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports function calling, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the given model is not found in model_prices_and_context_window.json.
|
||||
Exception: If the given model is not found or there's an error in retrieval.
|
||||
"""
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
if model in litellm.model_cost:
|
||||
model_info = litellm.model_cost[model]
|
||||
if model_info.get("supports_function_calling", False) is True:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Model not supports function calling. You passed model={model}."
|
||||
f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def supports_vision(model: str):
|
||||
def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Check if the given model supports vision and return a boolean value.
|
||||
|
||||
Parameters:
|
||||
model (str): The model name to be checked.
|
||||
custom_llm_provider (Optional[str]): The provider to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports vision, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the given model is not found in model_prices_and_context_window.json.
|
||||
"""
|
||||
if model in litellm.model_cost:
|
||||
model_info = litellm.model_cost[model]
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
if model_info.get("supports_vision", False) is True:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"Model not found or error in checking vision support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
|
@ -4755,6 +4772,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
|||
input_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
input_cost_per_query: Optional[float] # only for rerank models
|
||||
input_cost_per_image: Optional[float] # only for vertex ai models
|
||||
input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||
input_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||
|
@ -5000,6 +5018,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
|||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||
"input_cost_per_token_above_128k_tokens", None
|
||||
),
|
||||
input_cost_per_query=_model_info.get("input_cost_per_query", None),
|
||||
output_cost_per_token=_output_cost_per_token,
|
||||
output_cost_per_character=_model_info.get(
|
||||
"output_cost_per_character", None
|
||||
|
|
|
@ -990,6 +990,28 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/cohere-rerank-v3-multilingual": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"azure_ai/cohere-rerank-v3-english": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"azure_ai/Cohere-embed-v3-english": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
|
@ -3114,6 +3136,50 @@
|
|||
"litellm_provider": "cohere",
|
||||
"mode": "completion"
|
||||
},
|
||||
"rerank-english-v3.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-multilingual-v3.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-english-v2.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"rerank-multilingual-v2.0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"max_query_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"input_cost_per_query": 0.002,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
|
|
|
@ -125,7 +125,6 @@ async def test_regenerate_api_key(prisma_client):
|
|||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
import uuid
|
||||
|
||||
# generate new key
|
||||
key_alias = f"test_alias_regenerate_key-{uuid.uuid4()}"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue