diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md index aa3c2c4c5..5250ea403 100644 --- a/docs/my-website/docs/embedding/supported_embedding.md +++ b/docs/my-website/docs/embedding/supported_embedding.md @@ -84,6 +84,60 @@ print(query_result[:5]) + +## Image Embeddings + +For models that support image embeddings, you can pass in a base64 encoded image string to the `input` param. + + + + +```python +from litellm import embedding +import os + +# set your api key +os.environ["COHERE_API_KEY"] = "" + +response = embedding(model="cohere/embed-english-v3.0", input=[""]) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: cohere-embed + litellm_params: + model: cohere/embed-english-v3.0 + api_key: os.environ/COHERE_API_KEY +``` + + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \ +-H 'Authorization: Bearer sk-54d77cd67b9febbb' \ +-H 'Content-Type: application/json' \ +-d '{ + "model": "cohere/embed-english-v3.0", + "input": [""] +}' +``` + + + ## Input Params for `litellm.embedding()` diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index bf16a96e6..ee9a9096f 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -814,6 +814,7 @@ general_settings: | pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) | | enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | | forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). | +| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call | ### router_settings - Reference diff --git a/litellm/__init__.py b/litellm/__init__.py index 3282660e9..b1033e7a4 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -8,6 +8,7 @@ import os from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache +from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES from litellm._logging import ( set_verbose, _turn_on_debug, @@ -136,7 +137,7 @@ enable_azure_ad_token_refresh: Optional[bool] = False ### DEFAULT AZURE API VERSION ### AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest ### COHERE EMBEDDINGS DEFAULT TYPE ### -COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document" +COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document" ### GUARDRAILS ### llamaguard_model_name: Optional[str] = None openai_moderations_model_name: Optional[str] = None diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index fad01f0ff..4b64fb828 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -333,6 +333,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 api_key: Optional[str], dynamic_api_key: Optional[str], ) -> Tuple[str, str, Optional[str], Optional[str]]: + """ + Returns: + Tuple[str, str, Optional[str], Optional[str]]: + model: str + custom_llm_provider: str + dynamic_api_key: Optional[str] + api_base: Optional[str] + """ custom_llm_provider = model.split("/", 1)[0] model = model.split("/", 1)[1] diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 25cdcc2f3..7deb5490d 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -398,6 +398,8 @@ class AnthropicChatCompletion(BaseLLM): error_response = getattr(e, "response", None) if error_headers is None and error_response: error_headers = getattr(error_response, "headers", None) + if error_response and hasattr(error_response, "text"): + error_text = getattr(error_response, "text", error_text) raise AnthropicError( message=error_text, status_code=status_code, diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py index 682e7e654..638a77479 100644 --- a/litellm/llms/azure_ai/embed/handler.py +++ b/litellm/llms/azure_ai/embed/handler.py @@ -9,7 +9,7 @@ import httpx from openai import OpenAI import litellm -from litellm.llms.cohere.embed import embedding as cohere_embedding +from litellm.llms.cohere.embed.handler import embedding as cohere_embedding from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, diff --git a/litellm/llms/bedrock/embed/cohere_transformation.py b/litellm/llms/bedrock/embed/cohere_transformation.py index 7a1ab75fd..1020aa923 100644 --- a/litellm/llms/bedrock/embed/cohere_transformation.py +++ b/litellm/llms/bedrock/embed/cohere_transformation.py @@ -7,6 +7,7 @@ Why separate file? Make it easy to see how transformation works from typing import List import litellm +from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig from litellm.types.llms.bedrock import CohereEmbeddingRequest, CohereEmbeddingResponse from litellm.types.utils import Embedding, EmbeddingResponse @@ -26,15 +27,21 @@ class BedrockCohereEmbeddingConfig: optional_params["embedding_types"] = v return optional_params + def _is_v3_model(self, model: str) -> bool: + return "3" in model + def _transform_request( - self, input: List[str], inference_params: dict + self, model: str, input: List[str], inference_params: dict ) -> CohereEmbeddingRequest: - transformed_request = CohereEmbeddingRequest( - texts=input, - input_type=litellm.COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, # type: ignore + transformed_request = CohereEmbeddingConfig()._transform_request( + model, input, inference_params ) - for k, v in inference_params.items(): - transformed_request[k] = v # type: ignore + new_transformed_request = CohereEmbeddingRequest( + input_type=transformed_request["input_type"], + ) + for k in CohereEmbeddingRequest.__annotations__.keys(): + if k in transformed_request: + new_transformed_request[k] = transformed_request[k] # type: ignore - return transformed_request + return new_transformed_request diff --git a/litellm/llms/bedrock/embed/embedding.py b/litellm/llms/bedrock/embed/embedding.py index 6aefe2040..7a8591a94 100644 --- a/litellm/llms/bedrock/embed/embedding.py +++ b/litellm/llms/bedrock/embed/embedding.py @@ -11,7 +11,7 @@ from typing import Any, Callable, List, Literal, Optional, Tuple, Union import httpx import litellm -from litellm.llms.cohere.embed import embedding as cohere_embedding +from litellm.llms.cohere.embed.handler import embedding as cohere_embedding from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, @@ -369,7 +369,7 @@ class BedrockEmbedding(BaseAWSLLM): batch_data: Optional[List] = None if provider == "cohere": data = BedrockCohereEmbeddingConfig()._transform_request( - input=input, inference_params=inference_params + model=model, input=input, inference_params=inference_params ) elif provider == "amazon" and model in [ "amazon.titan-embed-image-v1", diff --git a/litellm/llms/cohere/embed.py b/litellm/llms/cohere/embed/handler.py similarity index 68% rename from litellm/llms/cohere/embed.py rename to litellm/llms/cohere/embed/handler.py index 5d640b506..95cbec225 100644 --- a/litellm/llms/cohere/embed.py +++ b/litellm/llms/cohere/embed/handler.py @@ -12,8 +12,11 @@ import requests # type: ignore import litellm from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.bedrock import CohereEmbeddingRequest from litellm.utils import Choices, Message, ModelResponse, Usage +from .transformation import CohereEmbeddingConfig + def validate_environment(api_key, headers: dict): headers.update( @@ -41,39 +44,9 @@ class CohereError(Exception): ) # Call the base class constructor with the parameters it needs -def _process_embedding_response( - embeddings: list, - model_response: litellm.EmbeddingResponse, - model: str, - encoding: Any, - input: list, -) -> litellm.EmbeddingResponse: - output_data = [] - for idx, embedding in enumerate(embeddings): - output_data.append( - {"object": "embedding", "index": idx, "embedding": embedding} - ) - model_response.object = "list" - model_response.data = output_data - model_response.model = model - input_tokens = 0 - for text in input: - input_tokens += len(encoding.encode(text)) - - setattr( - model_response, - "usage", - Usage( - prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens - ), - ) - - return model_response - - async def async_embedding( model: str, - data: dict, + data: Union[dict, CohereEmbeddingRequest], input: list, model_response: litellm.utils.EmbeddingResponse, timeout: Optional[Union[float, httpx.Timeout]], @@ -121,19 +94,12 @@ async def async_embedding( ) raise e - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response.text, - ) - - embeddings = response.json()["embeddings"] - ## PROCESS RESPONSE ## - return _process_embedding_response( - embeddings=embeddings, + return CohereEmbeddingConfig()._transform_response( + response=response, + api_key=api_key, + logging_obj=logging_obj, + data=data, model_response=model_response, model=model, encoding=encoding, @@ -149,7 +115,7 @@ def embedding( optional_params: dict, headers: dict, encoding: Any, - data: Optional[dict] = None, + data: Optional[Union[dict, CohereEmbeddingRequest]] = None, complete_api_base: Optional[str] = None, api_key: Optional[str] = None, aembedding: Optional[bool] = None, @@ -159,11 +125,10 @@ def embedding( headers = validate_environment(api_key, headers=headers) embed_url = complete_api_base or "https://api.cohere.ai/v1/embed" model = model - data = data or {"model": model, "texts": input, **optional_params} - if "3" in model and "input_type" not in data: - # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document" - data["input_type"] = "search_document" + data = data or CohereEmbeddingConfig()._transform_request( + model=model, input=input, inference_params=optional_params + ) ## ROUTING if aembedding is True: @@ -193,30 +158,12 @@ def embedding( client = HTTPHandler(concurrent_limit=1) response = client.post(embed_url, headers=headers, data=json.dumps(data)) - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response, - ) - """ - response - { - 'object': "list", - 'data': [ - - ] - 'model', - 'usage' - } - """ - if response.status_code != 200: - raise CohereError(message=response.text, status_code=response.status_code) - embeddings = response.json()["embeddings"] - return _process_embedding_response( - embeddings=embeddings, + return CohereEmbeddingConfig()._transform_response( + response=response, + api_key=api_key, + logging_obj=logging_obj, + data=data, model_response=model_response, model=model, encoding=encoding, diff --git a/litellm/llms/cohere/embed/transformation.py b/litellm/llms/cohere/embed/transformation.py new file mode 100644 index 000000000..e6bb0f392 --- /dev/null +++ b/litellm/llms/cohere/embed/transformation.py @@ -0,0 +1,160 @@ +""" +Transformation logic from OpenAI /v1/embeddings format to Cohere's /v1/embed format. + +Why separate file? Make it easy to see how transformation works + +Convers +- v3 embedding models +- v2 embedding models + +Docs - https://docs.cohere.com/v2/reference/embed +""" + +import types +from typing import Any, List, Optional, Union + +import httpx + +from litellm import COHERE_DEFAULT_EMBEDDING_INPUT_TYPE +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.llms.bedrock import ( + COHERE_EMBEDDING_INPUT_TYPES, + CohereEmbeddingRequest, + CohereEmbeddingRequestWithModel, +) +from litellm.types.utils import ( + Embedding, + EmbeddingResponse, + PromptTokensDetailsWrapper, + Usage, +) +from litellm.utils import is_base64_encoded + + +class CohereEmbeddingConfig: + """ + Reference: https://docs.cohere.com/v2/reference/embed + """ + + def __init__(self) -> None: + pass + + def get_supported_openai_params(self) -> List[str]: + return ["encoding_format"] + + def map_openai_params( + self, non_default_params: dict, optional_params: dict + ) -> dict: + for k, v in non_default_params.items(): + if k == "encoding_format": + optional_params["embedding_types"] = v + return optional_params + + def _is_v3_model(self, model: str) -> bool: + return "3" in model + + def _transform_request( + self, model: str, input: List[str], inference_params: dict + ) -> CohereEmbeddingRequestWithModel: + is_encoded = False + for input_str in input: + is_encoded = is_base64_encoded(input_str) + + if is_encoded: # check if string is b64 encoded image or not + transformed_request = CohereEmbeddingRequestWithModel( + model=model, + images=input, + input_type="image", + ) + else: + transformed_request = CohereEmbeddingRequestWithModel( + model=model, + texts=input, + input_type=COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, + ) + + for k, v in inference_params.items(): + transformed_request[k] = v # type: ignore + + return transformed_request + + def _calculate_usage(self, input: List[str], encoding: Any, meta: dict) -> Usage: + + input_tokens = 0 + + text_tokens: Optional[int] = meta.get("billed_units", {}).get("input_tokens") + + image_tokens: Optional[int] = meta.get("billed_units", {}).get("images") + + prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None + if image_tokens is None and text_tokens is None: + for text in input: + input_tokens += len(encoding.encode(text)) + else: + prompt_tokens_details = PromptTokensDetailsWrapper( + image_tokens=image_tokens, + text_tokens=text_tokens, + ) + if image_tokens: + input_tokens += image_tokens + if text_tokens: + input_tokens += text_tokens + + return Usage( + prompt_tokens=input_tokens, + completion_tokens=0, + total_tokens=input_tokens, + prompt_tokens_details=prompt_tokens_details, + ) + + def _transform_response( + self, + response: httpx.Response, + api_key: Optional[str], + logging_obj: LiteLLMLoggingObj, + data: Union[dict, CohereEmbeddingRequest], + model_response: EmbeddingResponse, + model: str, + encoding: Any, + input: list, + ) -> EmbeddingResponse: + + response_json = response.json() + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response_json, + ) + """ + response + { + 'object': "list", + 'data': [ + + ] + 'model', + 'usage' + } + """ + embeddings = response_json["embeddings"] + output_data = [] + for idx, embedding in enumerate(embeddings): + output_data.append( + {"object": "embedding", "index": idx, "embedding": embedding} + ) + model_response.object = "list" + model_response.data = output_data + model_response.model = model + input_tokens = 0 + for text in input: + input_tokens += len(encoding.encode(text)) + + setattr( + model_response, + "usage", + self._calculate_usage(input, encoding, response_json.get("meta", {})), + ) + + return model_response diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 89b294584..55851a636 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -152,8 +152,10 @@ class AsyncHTTPHandler: setattr(e, "status_code", e.response.status_code) if stream is True: setattr(e, "message", await e.response.aread()) + setattr(e, "text", await e.response.aread()) else: setattr(e, "message", e.response.text) + setattr(e, "text", e.response.text) raise e except Exception as e: raise e diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 15ee85fae..ebfdd41d0 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -2429,6 +2429,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915 contents: List[BedrockMessageBlock] = [] msg_i = 0 + ## BASE CASE ## + if len(messages) == 0: + raise litellm.BadRequestError( + message=BAD_MESSAGE_ERROR_STR + + "bedrock requires at least one non-system message", + model=model, + llm_provider=llm_provider, + ) + # if initial message is assistant message if messages[0].get("role") is not None and messages[0]["role"] == "assistant": if user_continue_message is not None: diff --git a/litellm/main.py b/litellm/main.py index f239d2612..f6680f2df 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -113,7 +113,7 @@ from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM from .llms.bedrock.embed.embedding import BedrockEmbedding from .llms.cohere import chat as cohere_chat from .llms.cohere import completion as cohere_completion # type: ignore -from .llms.cohere import embed as cohere_embed +from .llms.cohere.embed import handler as cohere_embed from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks.chat import DatabricksChatCompletion from .llms.groq.chat.handler import GroqChatCompletion diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 890ef8688..fe8834dbb 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3364,54 +3364,56 @@ "litellm_provider": "cohere", "mode": "rerank" }, - "embed-english-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, - "input_cost_per_token": 0.00000010, - "output_cost_per_token": 0.00000, - "litellm_provider": "cohere", - "mode": "embedding" - }, "embed-english-light-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-light-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v2.0": { - "max_tokens": 256, - "max_input_tokens": 256, + "max_tokens": 768, + "max_input_tokens": 768, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, + "embed-english-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "input_cost_per_image": 0.0001, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding", + "supports_image_input": true + }, "replicate/meta/llama-2-13b": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 9ee547652..a34dffccd 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -238,11 +238,15 @@ class LiteLLMProxyRequestSetup: - Adds org id """ data = LitellmDataForBackendLLMCall() - _headers = LiteLLMProxyRequestSetup.add_headers_to_llm_call( - headers, user_api_key_dict - ) - if _headers != {}: - data["headers"] = _headers + if ( + general_settings + and general_settings.get("forward_client_headers_to_llm_api") is True + ): + _headers = LiteLLMProxyRequestSetup.add_headers_to_llm_call( + headers, user_api_key_dict + ) + if _headers != {}: + data["headers"] = _headers _organization = LiteLLMProxyRequestSetup.get_openai_org_id_from_headers( headers, general_settings ) diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py index 4fa0b06bb..737aac3c3 100644 --- a/litellm/types/llms/bedrock.py +++ b/litellm/types/llms/bedrock.py @@ -210,15 +210,23 @@ class ServerSentEvent: return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})" +COHERE_EMBEDDING_INPUT_TYPES = Literal[ + "search_document", "search_query", "classification", "clustering", "image" +] + + class CohereEmbeddingRequest(TypedDict, total=False): - texts: Required[List[str]] - input_type: Required[ - Literal["search_document", "search_query", "classification", "clustering"] - ] + texts: List[str] + images: List[str] + input_type: Required[COHERE_EMBEDDING_INPUT_TYPES] truncate: Literal["NONE", "START", "END"] embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"] +class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest): + model: Required[str] + + class CohereEmbeddingResponse(TypedDict): embeddings: List[List[float]] id: str diff --git a/litellm/utils.py b/litellm/utils.py index dc190bc1a..deb3ae8c6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5197,7 +5197,9 @@ def create_proxy_transport_and_mounts(): def validate_environment( # noqa: PLR0915 - model: Optional[str] = None, api_key: Optional[str] = None + model: Optional[str] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, ) -> dict: """ Checks if the environment variables are valid for the given model. @@ -5224,11 +5226,6 @@ def validate_environment( # noqa: PLR0915 _, custom_llm_provider, _, _ = get_llm_provider(model=model) except Exception: custom_llm_provider = None - # # check if llm provider part of model name - # if model.split("/",1)[0] in litellm.provider_list: - # custom_llm_provider = model.split("/", 1)[0] - # model = model.split("/", 1)[1] - # custom_llm_provider_passed_in = True if custom_llm_provider: if custom_llm_provider == "openai": @@ -5497,6 +5494,17 @@ def validate_environment( # noqa: PLR0915 if "api_key" not in key.lower(): new_missing_keys.append(key) missing_keys = new_missing_keys + + if api_base is not None: + new_missing_keys = [] + for key in missing_keys: + if "api_base" not in key.lower(): + new_missing_keys.append(key) + missing_keys = new_missing_keys + + if len(missing_keys) == 0: # no missing keys + keys_in_environment = True + return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys} diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 890ef8688..fe8834dbb 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3364,54 +3364,56 @@ "litellm_provider": "cohere", "mode": "rerank" }, - "embed-english-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, - "input_cost_per_token": 0.00000010, - "output_cost_per_token": 0.00000, - "litellm_provider": "cohere", - "mode": "embedding" - }, "embed-english-light-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-light-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v2.0": { - "max_tokens": 256, - "max_input_tokens": 256, + "max_tokens": 768, + "max_input_tokens": 768, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, + "embed-english-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "input_cost_per_image": 0.0001, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding", + "supports_image_input": true + }, "replicate/meta/llama-2-13b": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/tests/local_testing/test_embedding.py b/tests/local_testing/test_embedding.py index 4c7560ccc..7993d3280 100644 --- a/tests/local_testing/test_embedding.py +++ b/tests/local_testing/test_embedding.py @@ -1055,3 +1055,28 @@ def test_embedding_response_ratelimit_headers(model): assert int(additional_headers["x-ratelimit-remaining-requests"]) > 0 assert "x-ratelimit-remaining-tokens" in additional_headers assert int(additional_headers["x-ratelimit-remaining-tokens"]) > 0 + + +@pytest.mark.parametrize( + "input, input_type", + [ + ( + [ + "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD//gAfQ29tcHJlc3NlZCBieSBqcGVnLXJlY29tcHJlc3P/2wCEAAQEBAQEBAQEBAQGBgUGBggHBwcHCAwJCQkJCQwTDA4MDA4MExEUEA8QFBEeFxUVFx4iHRsdIiolJSo0MjRERFwBBAQEBAQEBAQEBAYGBQYGCAcHBwcIDAkJCQkJDBMMDgwMDgwTERQQDxAUER4XFRUXHiIdGx0iKiUlKjQyNEREXP/CABEIAZABkAMBIgACEQEDEQH/xAAdAAEAAQQDAQAAAAAAAAAAAAAABwEFBggCAwQJ/9oACAEBAAAAAN/gAAAAAAAAAAAAAAAAAAAAAAAAAAHTg9j6agAAp23/ADjsAAAPFrlAUYeagAAArdZ12uzcAAKax6jWUAAAAO/bna+oAC1aBxAAAAAAbM7rVABYvnRgYAAAAAbwbIABw+cMYAAAAAAvH1CuwA091RAAAAAAbpbPAGJfMXzAAAAAAJk+hdQGlmsQAAAAABk31JqBx+V1iAAAAAALp9W6gRp826AAAAAAGS/UqoGuGjwAAAAAAl76I1A1K1EAAAAAAG5G1ADUHU0AAAAAAu/1Cu4DVbTgAAAAAA3n2JAIG0IAAAAAArt3toAMV+XfEAAAAAL1uzPlQBT5qR2AAAAAenZDbm/AAa06SgAAAAerYra/LQADp+YmIAAAAC77J7Q5KAACIPnjwAAAAzbZzY24gAAGq+m4AAA7Zo2cmaoAAANWdOOAAAMl2N2TysAAAApEOj2HgAOyYtl5w5jw4zZPJyuGQ5H2AAAdes+suDUAVyfYbZTLajG8HxjgD153n3IAABH8QxxiVo4XPKpGlyTKjowvCbUAF4mD3AAACgqCzYPiPQAA900XAACmN4favRk+a9wB0xdiNAAAvU1cgAxeDcUoPdL0s1B44atQAACSs8AEewD0gM72I5jjDFiAAAPfO1QGL6z9IAlGdRgkaAAABMmRANZsSADls7k6kFW8AAAJIz4DHtW6AAk+d1jhUAAAGdyWBFcGgAX/AGnYZFgAAAM4k4CF4hAA9u3FcKi4AAAEiSEBCsRgAe3biuGxWAAACXsoAiKFgALttgs0J0AAAHpnvkBhOt4AGebE1pBtsAAAGeySA4an2wAGwEjGFxaAAAe+c+wAjKBgAyfZ3kUh3HAAAO6Yb+AKQLGgBctmb2HXDNjAAD1yzkQAENRF1gyvYG9AcI2wjgAByyuSveAAWWMcQtnoyOQs8qAPFhVh8HADt999y65gAAKKgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAf/8QAGgEBAAMBAQEAAAAAAAAAAAAAAAEFBgIEA//aAAgBAhAAAAAAAAAAAAABEAAJkBEAAB0CIAABMhyAAA6EQAAA6EQAABMiIAAAmREAAAmQiAABMgOQAEyAHIATIACIBMu7H3fT419eACEnps7DoPFQch889Wd3V2TeWIBV0o+eF8I0OrXVoAIyvBm8uDe2Wp6ADO+Mw9WDV6rSgAzvjMNWA1Op1AARlvmZbOA3NnpfSAK6iHnwfnFttZ9Wh7AeXPcB5cxWd3Wk7Pvb+uR8q+rgAAAAAAAAP//EABsBAQABBQEAAAAAAAAAAAAAAAAEAQIDBQYH/9oACAEDEAAAAAAAAAC20AL6gCNDxAArnn3gpro4AAv2l4QIgAAJWwGLVAAAX7cQYYAAFdyNZgAAAy7UazAAABsZI18UAAE6YEfWgACRNygavCACsmZkALNZjAMkqVcAC2FFoKyJWe+fMyYoMAAUw2L8t0jYzqhE0dAzd70eHj+PK7mcAa7UDN7VvBwXmDb7EAU5uw9C9KCnh2n6WoAaKIey9ODy/jN+ADRRD2fpQeY8P0QAU5zGel+gg8V53oc4AgaYTfcJ45Tx5I31wCPobQ2PpPRYuP8APMZm2kqoxQddQAAAAAAAAP/EAFMQAAEDAgIDCQkMBwUIAwAAAAECAwQFEQAGBzFREhMhMEBBYXGBCBQYIjJCRlDSFSBSVGJygpGTobHREDRDc6LBwiMzU3CyFiQlNVVkdISSlLP/2gAIAQEAAT8A/wAo74nVaBAb32bNYitfDfcS2PrURiZpU0dwVFMjN1OVY8O8u7//APkFYc076LmfSVSvmQpB/ox4QGjH/r7v/wBGR7OPCA0YH0ge7IMj2ceEBowPpA92QZHs48IDRgfSB7sgyPZx4QGjA+kD3ZBkezjwgNGB9IHuyDI9nHhAaMD6QPdkGR7OPCA0YH0ge7IMj2ceEBowPpA92QZHs48IDRgfSB7sgyPZx4QGjA+kD3ZBkezjwgNGB9IHuyDI9nHhAaMD6QPdkGR7OPCA0YH0ge7IMj2ceEBowPpA92QZHs48IDRgfSB7sgyPZx4QGjA+kD3ZBkezjwgNGB9IHuyDI9nHhAaMD6QPdkGR7OPCA0Y89fd7IMj2cN6e9GDpCTmRaOuFI9nEDSlo9qakpj5upoJNgH3d4+50JxGlxpbSH4r7bzSvJW0sLSeop5NWsw0fL8RU2rVGPDjJ4C6+4EAnYnaegYzV3StDhFcfK1LdqDuoSZBLDHWlPlqxXtNmkOulaVVxcFg3/sYA73A+kLrxKnTJrpfmSXX3jrcdWVqPWVYudvJ7nbil16s0R7vikVSVDduCVR3lNk9e5IvjKfdG5rpKmo+Yo7NXi8ALlgxJH0kiysZL0l5Uzsz/AMFn2l7m7kJ8BuSj6PnAbU8ieeZitOPPuoQ22krWtZCUpSkXJJOoDGkHui4MBT1MyW2ibITdJnuA97o/dJ1uHFczFXMyzV1Gu1N+bJV57yr7kbEjUkdA5dGlSYb7UqJIcZfaUFtuNLKFoUNRSocIONF3dBb6tih58eSCQEM1PUOqT7eELS4lK0KCkkAgg3BB4/M2Z6NlKlSKtWJiI8VoWueFS1nUhA85ZxpJ0v13Pj7kNorg0NC7tw0K4XNi3yPKPRqHqLQnpkeoD8XKmZZJVSHCG4klw/qijqQs/wCF/pwDfjc1ZqpOUKNLrVXf3qMyLJSLFbrh8ltA51qxn7P9az9V1z6istxWypMSIhRLbCD+Kj5yvUYJHCMdz7pLXWoByfWJBXUILV4bizwvRk+Z0qa4yoTodKgyZ859DEWO0t11xZslCEC5UrGlHSNOz/XVvBa26RFKkQY+xHO4v5a/UtArU3LlZptbpzm4lQ30ut7DbWk9ChwHGXq5EzHQ6ZWoCv8AdpsdDyRrIKtaFdKTwHi+6I0hrffGRKU/ZloodqSkngW5rQz1I1n1P3M2ZzJpFYyvIXdUJ0SowP8AhP8AAtI6AvitIWbWclZVqlbWElxpvcRmz+0kOcDaf5nEyXJnypM2Y8p2Q+6t11xRupa1m6lHpJ9T6B6uaVpHo7alEMz0PQnepxN0/wASRgauJ7pTNZmVynZTjuXZpzYkSRtkPDgB6UI9UZMlrgZsy1MQqxZqkRy/QHRfA4iZIaiRX5D6ghpptTi1bEIFycZmrL2YcwVitvk7ubLdfsfNClcCewcHqiiX91qbbX3yz/rGBxGmKse4ujnMz6F2dfjiGj/2VBs/ccE3J9UZOirm5ry3EQm5eqkRu3Qp0YHEd01PLGUqPT0mxk1QLV0oZaPteqdBtKNV0kUIkXah77Md6mkcH8RGBq4jupH7JyXG/wDPcP1tj1T3MuWVMQK5mt9FjJWmDGO1tHjuHqJ4nupEnvrJa+beZ4/jR6ooNGnZhrFOotNa3yXMeS02OvWo9CRwk4ytQIeWKDS6HC/V4TCWgq1itWtSz0rPCeJ7qKNenZSl2/upEtonpcShXqcC+NA+jFeW4H+1NbYKatOaswysWMaOrbscc4rujaYZuj/vzccMCpR3yehwFn+r1MAVGwGNDOhVbK4ubc4xLLFnYMB1PCNjrw/BHF58opzDk7MlHSndOSID28ja6gbtH3jChZRHqShZerOZag1S6JT3pcpzUhsahtUTwJTtJxow0G0vKRYreYS1PrIAUhNrx4yvkA+WsfCONXFnGlTLZytnqvU5KLRlvmTG2Fl/xwB0J1eookOXPkNRYUZ1991W5baaQVrWdiUi5JxkbudKzVCzOzg+abE196NWXKWOnWlvGW8p0DKMEU6g01qKzwFe5F1uEDynFnhUeO7pTJ5n0aBmyK3d+mneJVtZjOnxVfQX6ghwZtRktQ4EV6RJcNkNMoK1qOwJTcnGTe5yr9V3qXmuSKXFNj3uizkpY/0oxlbIOVslRt6oVKaZdIst9XjyHPnOK4ezkFVgw6vAmU2ewHYsllbDiFaloWNyoYz1lKZknMtRoEu6gyvdMO8zrC/IXy2j0Cs5glpg0WmyJkk+YwgrIG1WwdJxk7uap75amZyqQit6zChkLe6lueSnGWcl5ayjGEegUliKCAFuAbp5z57irqPI9NOjVOdqB31T2x7tU5KlxNryNa2CenWnDra2XFtOoUhaFFKkqFiCOAgg8qyro7zdnJwCh0Z5xi9lSVje46etarA22DGUe5spEPe5ebqgue78Ui3aj9Sl+WvFIodHoMREGj02PDjJ1NMNhAJ2m2s8m07aIHJi5WdMsxSZFiuoxG08LoGt9sDz/hjGrkzLD0hxDLDSluLISlKQSpRPMAMZU0C54zFvcidHTR4Sv2k24dI+SyPG+u2MqaBskZc3qRLimrzEftZoBaB+S0PFw0y2y2hppCUIQAEpSAAAOYAauU6XtBJmuycy5LjASVXcl05sWDu1bGxe1GHWnGXFtOoUhxCilSVAghSTYgg6iOR5eyfmXNT/AHvQKNJmKBspTaLNo+es2SntOMq9zNIc3uTm+sBoazEgWWvtdWLDGWchZTyk2E0KiR4zlrKkEbt9XW4u6uW6SNDNAzwHZ7BTTq3YkSm0XS7sS+ka/na8ZuyJmbJMwxK9T1NJJs1IR47D3S2vj2mXXlobabUtaiAlKRcknUAMZV0F56zJvT8iEKVCVY77PuhZHyWvLxlTuesl0Te3qqlysy08JMnxI4PQ0n+onEWDFhMNxokdphhsWQ20gIQkbEpFgPeyqnBg/rMhCCBfc3ur6hw4lZ1hNbpMdlbpGokhKT+OHs7zVf3EdpHzgVfzGDnGqnnbHUkYGcqqOZo/OT+VsMZ5eBG/w0K2lJKPaxDzfTJBCXFLZUTbxk3+q2GJTEhAcYdQtB1KSoEckqdLp1ThvQqnEZkxXU7lbLyAtCusKxnPubKVNU9NyhOMB03Pekm7kfsXwqRjM+jfOWUVLNZochEcapLY31gj56LgduLHZxNjjL+TM0ZpcDdCokuWL2LiEWaSflOKskYyt3M8t0tSM31hLCNZiwbLc7XVCwxljR9lHKDaRQ6Kww6BZUlQ32Qr6a7nAAHvFLSkEqUAAMT81UyGClDm/r2N6u1WKhm2oywpDKt4bPMjX/8ALC3HHCVLWSSbm+338adLhuB2O+tChzg4pOdOFDVRRbm31A/EflhiQ1IbS6y4laFaik3HJCkKBBAII4RjMOibIOYCtc/LkZD6tb0W8Zy+0luwVisdzDRX925RMyS4uxMtlD46gUFGKj3NWdY11wajSpbf71bS/qUnErQTpPjXIy2Xk7WZLCv68L0R6R2/KylO+ikK/A4Tom0jL1ZRqHa3bEXQjpPlkBGVXkDa48yj8V4p/c358lEGW/TIaOcOSCtfYG0qxSO5gp6AldczQ+9tbhsBr+NwqxRNDWjygFDjGXmpL4N99nEyVH6K/FGGmGY7SGm20oQgAJSkAJAHMAPeyJ8WEjfJD6EX1XP4DWTioZ1ZRdEBndnmWvgT2DE6tVCoE98SFFPMgGyR2DBN+E8XSq3MpToUyu7ZIK0HUcUmsRapGK46wlfBuknWnk5AOsY3I2YsNmLAagPf1HMFNp+6S68FOD9mjhV+QxUM5THrohJDKNutWHpL8halvOqWo6yokk8fT58inSESI6ylST2EbDtGKRU49VitvtkJI8tOsg7OOJA1nFSzhQKaVIkT21OA23DV3Fdu51Yk6VICCREpzznS4pKPw3WDpXk34KOgD9+fZwxpWB4JNIIG1D1/xTinaSMvylJDy3YyjwDfUXH1pviFPhTGw/FkNuoOpbagofdxU2fHhMqekOBDadus4q+bJcwqahkssfxnrOFKKjckk8iodWcpUxDySS2rgcTfWMMPtvstvNKCkLSFJI5weMzFm6mZfQUvL32UQCiOg+N1q2DFbzlWa2paXHyzGOplolKbfKOtWLnb72FUp9NeD8GU4y4OdBtfr2jGW9JTbqm4tdQlCr2D6fIPzxzYadbdQhxpYUlQBBBuCD7+pVKPTIq5D6uAcCUjWpWwYqtWlVV9Tr6yE6kIHkpHJcl1cqS5TXjfc+O3f7xxedc6IoqTAgEKnqHCdYZB5ztVsGH5D0p5x+Q6px1ZKlKUbknico5zk0J5EWWtTtPWeFOstdKejaMR5TMxhuQw4lbTiQpKkm4UD7151thtbriwlCElSidQAxXaw7VZalXsyglLadg/M8mpstcKbHko1oWDbb0duGXEOtIcQbpUkKB2g8Tm3MSMv0xbySDJduhhB+FtPQMSJD0p5yRIcK3XFFSlK1kni9HealU+UijzFjvZ5X9iVHyHDzdSve5yqqm2kU5pViuynCNnMOUZVld80lgKsVNEtns4QPqPEKNgTjOdbVWq0+tC7xmCWmRzWTrV2njEqUhQUkkEG4Ixk6ue7dFjPuuXeau08Plp5+0cP6VrS22pSiAACSdgGKpMXPnSJK/PWSBsHMOzlGRX/EmsW8koWOs3B4jONTNNoNQkIUUr3ve27awpzxb4PCTxujGpKYqkinKV4klvdJ+e3+nMkjvakS1DWtIb7FcB+7BNyTyjI67S5CDzsqP1EcRpUkqRTqfFBtvr6l9iE2/nx2V5XeeYKS9/3CEdizuD+OEm4/RnVak0+OhJtd256gm38+U5JTeY+rYyofeniNKyjv8AR0c24f8AxTx1NJTUYKhrD7Z/iGEeSP0Z63Pe8Xc6hur9dxynI7JtNeOqyAO0m/EaVv1mj/Mf/FPHU7/mEL98j8cI8gfozq2pdOZWnmdseopJ5TlKIWKShZFi8tSz2eL/AC4jSsx/Y0qR8FbqD9IA8dQmFSK1S2UjypTQ7N0L4SLJ/RmOOJVIloSk+Ijdjb4nCcEWJB5PDjrlSWWGxdS1hI7TiHHRGjsso8htCUDqSLcRpDppl5ckLABXHUl8DYBwH7jx2juAZeYmXyk7iM2t07L23I/HA/QtIWkpULggjFXgqp8+RHINkrO5O0axyfJlLK3l1F1Pit3S3cecRr7BxMqM3IjusOpCkOoKVjakixGKzTXaTU5cB4HdNOEAnzk6we0cbo3o5g0hU91FnZhCh+7T5PvM6UjfWkTmE3W0LObSnmPZyanQHqjKajMjhUeE2uANpxAhNQYzTDabNtpsOk85PXxWkjLJmRk1mGjdPR0WdA85rb9HjMqUByv1Rtgg97N2W+vYjZ1qww02y2htCQlCEhKUjUAPeLQlxCkLAUlQsQdRBxmKiOUqWopSox1m6FHht0HkjDDsl1DLKCpajYAYoFFRSYw3dlSF8K1bPkji1JCgUkXBxnjJTlJecqVOZvCWbrQn9kT/AEniqVSplYmNQoTRW4s9iRzqUeYDGXaBFoFPbiMC6/KdctYrVt/Ie+qECNMjKjyE7oLHaOkYrVEkUl8hQKmVE7hY1HkUOFInPoYjtla1bMUDLzNKb3xyy5KvKXzDoTxrjaHEKQ4gKSoWIIuCDzYzTo5WlTk2ggEG6lxr6vmH+WHmXWHFtPNqQ4k2UlQIIOwg+/y/lCq19xKm2yzFv4z7g8X6I844oOXoFBiiPDb4TYuOny1kbTxEmOxKaVHebS4hXlA4rWTpEdSnqfdxu5JR5w6tuFtONKKXEFJBsQeOShSzZIvilZTnTShySCwyfhDxj1DFPpcSmtBuM0B8JR4VK6zyCr5apFaQROiJWsCwdT4qx1KGKloseG7XSp4UnmQ+LfxJxJyLmaMoj3OU4n4TakqwrLVfSbGjy/sV4ZyhmN/yKRI+kncf6rYhaM64+QZa2YyOk7tQ7E4o+jyiU0h2SgzHhzu+R2I/PCEIbASgAJAsAOLqFFp84HvphKlkCyhwK4OnZiXkcElUKV9Fz2hh/KdZataPuwfOSoEYXQqog2MJ49Taj/LHuNVPiEj7Jf5Y9xqp8QkfZL/LHuNVPiEj7Jf5Y9xqp8QkfZL/ACx7jVT4hI+yX+WPcaqfEJH2S/yx7jVT4hI+yX+WEUCquaoTw+chQ/EYYyjWHQSpgN9K1C33XOIuR0+VMlfRbH8ziFRKdTwksRkhY89XjK+/VyWwxYf5ef/EADgRAAIBAgMDCQUHBQAAAAAAAAECAwQRAAUgMUFhEhMhIjBAUXGREDJQU6EGFDNCYoGSUnKiwdH/2gAIAQIBAT8A+L37e/wE9zHfj3k90Gk90Gk9ztqPcbd3t3e3b2129qRySGyIScRZY56ZXtwGFoKZfyX8zj7rT/JX0w+X0zbFKngcTZdLHdozyx9cbOg9pbFtENJPNYqlh4nEOWxJYykufQYVFQWRQBw1VVGk4LKAJPHxwysjFWFiNUsscKGSVwqjecVOfgErSxX/AFNhs5r2P4oHkoxHndchHKZXHFf+YpM7gnISYc0/+J0KpYhVFycUtCkQDygM/huHZZjThl59R1l97iNMsqQxvLIbKoucV1dLWykkkRg9VdOUZmyOtLO10PQhO4+Hty6mCrz7jpPu+XZsoZSp2EEYkQxyOh/KSNGf1JAipVO3rNq2EHGW1P3mkikJ6w6reYxGpd0QbyBhVCqFGwC3aV4tUycbHRnLFq+UeAUfTX9nmJhqE3BwfUYoxeqi8+1ryDVPwA0ZwCMwm4hT9Nf2eB5qobcWUfTFM3Inib9Q7QkAEnYMSvzkrv4knRn8BEkVQB0Ecg+Y15RTmCij5Qsz9c/v7KWYTQo28dDefZ5hUBI+aU9Z9vAaamnSqheF9jD0OKmmlpZWilFiNh3Eacqy9quUSSLaFDc8T4YAt7KWpNPJfap94YR1kUOhuD2NTVJTr4vuGHdpHZ3NydVVSQVaciZfIjaMVOR1URJhtKvocNSVSmzU8gP9pxHQVkhASnf9xbFJkJuHq2Fv6F/2cIiRoqIoVQLADRBUSwG6Ho3g7DiLMYX6Huh9RgTwtslT1GOdi+YnqMc7F8xP5DHOxfMT+Qxz0XzE9Rh6ymTbKD5dOJsyY3WFbcThmZiWYkk7z8W//8QAOREAAgECAgYHBwMDBQAAAAAAAQIDAAQFERITICExkQYwQVFSYXEQFCJAQlOBMlChI4KSYnJzsbL/2gAIAQMBAT8A/YCyjiwFa2PxjnWtj8Y51rY/GOda2PxjnWtj8Y51rY/GOda2PxjnWtj8Y51rY/GOda2PxjnWtj8YoMp4EHq5LlV3LvNPNI/FuXW5kcDUdw6cd4pJFkGanbJABJqacvmq7l+RR2Rgy0jiRQw2rmXM6CncOPydq+T6B4HZmfQjJ7eA+UQ6LqfMbN229V/Pyg4j1GzcnOVvlIV0pFH52bgZSt8pbRaC6TcTs3YycHvHyQBJAFQ2+WTyfgbVymlHmOI+Rjt3fe3wio4kj4Df39RNGY38jw60AscgMzSWrHe5yFJEkfBd/f1UiLIpU1JG0ZyPVJE7/pWktRxc/gUqKgyVQOtZVcZMMxUlqw3pvHdRBU5EEbIBO4CktpG3t8IpLeNOzM+fsSN5DkikmosPY75Wy8hS2duv0Z+te7wfaXlT2Nu3BSvoalsJE3xnTH81vG49UVVtzAGjbRH6cq90TxGvdE8RoW0Q7M6Cqu5VA9kVrNLvC5DvNRWEa75CWPIUqqgyVQB5bVzarMCy7n7++mUoxVhkRtW9tPdypBbRNJI3BVFYf0FdlWTErnQP24uP5JqLojgUYyNqznvZ2q46GYLKDq0khPejk/8ArOsU6HX1irTWre8xDeQBk4/FHduPtALEKozJq3skjAaQaT/wOqv4NJdco3jj6bNtby3c8VtAulJIwVRWCYJb4PbKqqGnYDWSdpPcPLZ6V9HEmikxOxjAlQaUqL9Q7x5+2xgCrrmG8/p9OrIDAg8CKkTQd07iRsdBcPV3ucSkX9H9KP1O8naIBBBG410gsBh2K3MCDKNjrE/2tSLpuqDtIFKAqhRwA6y9GVw/mAdjohEEwK2I4u0jH/Lb6exgXljL2tEwP9pq0GdzF69bfHO4fyAGx0ScPgVpl9JkB/yO309cG6w9O0ROeZq3bQnib/UOsJyBJqV9ZI7952Ogl8DDdYezfEra1B5HcdvpTfC+xicoc44QIl/t4/z7LaUTRK3bwPr1d9PoJqlPxN/A2cOvpsNvIbyA/Eh3jvHaDWHYjbYnapdWzgg/qHap7js9JseTDLZreBwbuVSAB9AP1GiSSSeJ9ltcGB8/pPEUjq6hlOYPU3FykC97dgp3aRi7HMnaw3FbzCptdaSZeJDvVh5isO6aYdcqq3gNvJ25705ikxXDJAGS/gI/5FqfHMIt10pb+H0DBjyGdYr03XRaLCojnw1sg/6FTTSzyPNNIXkc5szHMnYhuJIDmh3doPCo7+F9z5oaE0R4SrzrWR/cXnWsj+4vOtZH9xeYrWx/cXmKe6gTjID6b6lxAnMQrl5mmYsSzEkn92//2Q==" + ], + "image", + ), + (["hello world"], "text"), + ], +) +def test_cohere_img_embeddings(input, input_type): + litellm.set_verbose = True + response = embedding( + model="cohere/embed-english-v3.0", + input=input, + ) + + if input_type == "image": + assert response.usage.prompt_tokens_details.image_tokens > 0 + else: + assert response.usage.prompt_tokens_details.text_tokens > 0 diff --git a/tests/local_testing/test_get_llm_provider.py b/tests/local_testing/test_get_llm_provider.py index e72373805..f7126cec0 100644 --- a/tests/local_testing/test_get_llm_provider.py +++ b/tests/local_testing/test_get_llm_provider.py @@ -160,3 +160,12 @@ def test_get_llm_provider_jina_ai(): assert custom_llm_provider == "openai_like" assert api_base == "https://api.jina.ai/v1" assert model == "jina-embeddings-v3" + + +def test_get_llm_provider_hosted_vllm(): + model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider( + model="hosted_vllm/llama-3.1-70b-instruct", + ) + assert custom_llm_provider == "hosted_vllm" + assert model == "llama-3.1-70b-instruct" + assert dynamic_api_key == "" diff --git a/tests/local_testing/test_prompt_factory.py b/tests/local_testing/test_prompt_factory.py index 74e7cefa5..7b4e295ce 100644 --- a/tests/local_testing/test_prompt_factory.py +++ b/tests/local_testing/test_prompt_factory.py @@ -675,3 +675,15 @@ def test_alternating_roles_e2e(): "stream": False, } ) + + +def test_just_system_message(): + from litellm.llms.prompt_templates.factory import _bedrock_converse_messages_pt + + with pytest.raises(litellm.BadRequestError) as e: + _bedrock_converse_messages_pt( + messages=[], + model="anthropic.claude-3-sonnet-20240229-v1:0", + llm_provider="bedrock", + ) + assert "bedrock requires at least one non-system message" in str(e.value) diff --git a/tests/local_testing/test_proxy_server.py b/tests/local_testing/test_proxy_server.py index 803243557..51ec085ba 100644 --- a/tests/local_testing/test_proxy_server.py +++ b/tests/local_testing/test_proxy_server.py @@ -225,12 +225,20 @@ def test_add_headers_to_request(litellm_key_header_name): "litellm_key_header_name", ["x-litellm-key", None], ) +@pytest.mark.parametrize( + "forward_headers", + [True, False], +) @mock_patch_acompletion() def test_chat_completion_forward_headers( - mock_acompletion, client_no_auth, litellm_key_header_name + mock_acompletion, client_no_auth, litellm_key_header_name, forward_headers ): global headers try: + if forward_headers: + gs = getattr(litellm.proxy.proxy_server, "general_settings") + gs["forward_client_headers_to_llm_api"] = True + setattr(litellm.proxy.proxy_server, "general_settings", gs) if litellm_key_header_name is not None: gs = getattr(litellm.proxy.proxy_server, "general_settings") gs["litellm_key_header_name"] = litellm_key_header_name @@ -260,23 +268,14 @@ def test_chat_completion_forward_headers( response = client_no_auth.post( "/v1/chat/completions", json=test_data, headers=received_headers ) - mock_acompletion.assert_called_once_with( - model="gpt-3.5-turbo", - messages=[ - {"role": "user", "content": "hi"}, - ], - max_tokens=10, - litellm_call_id=mock.ANY, - litellm_logging_obj=mock.ANY, - request_timeout=mock.ANY, - specific_deployment=True, - metadata=mock.ANY, - proxy_server_request=mock.ANY, - headers={ + if not forward_headers: + assert "headers" not in mock_acompletion.call_args.kwargs + else: + assert mock_acompletion.call_args.kwargs["headers"] == { "x-custom-header": "Custom-Value", "x-another-header": "Another-Value", - }, - ) + } + print(f"response - {response.text}") assert response.status_code == 200 result = response.json() diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py index 9c26da614..3558f88bc 100644 --- a/tests/local_testing/test_utils.py +++ b/tests/local_testing/test_utils.py @@ -331,6 +331,13 @@ def test_validate_environment_api_key(): ), f"Missing keys={response_obj['missing_keys']}" +def test_validate_environment_api_base_dynamic(): + for provider in ["ollama", "ollama_chat"]: + kv = validate_environment(provider + "/mistral", api_base="https://example.com") + assert kv["keys_in_environment"] + assert kv["missing_keys"] == [] + + @mock.patch.dict(os.environ, {"OLLAMA_API_BASE": "foo"}, clear=True) def test_validate_environment_ollama(): for provider in ["ollama", "ollama_chat"]: