diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md index aa3c2c4c5..5250ea403 100644 --- a/docs/my-website/docs/embedding/supported_embedding.md +++ b/docs/my-website/docs/embedding/supported_embedding.md @@ -84,6 +84,60 @@ print(query_result[:5]) + +## Image Embeddings + +For models that support image embeddings, you can pass in a base64 encoded image string to the `input` param. + + + + +```python +from litellm import embedding +import os + +# set your api key +os.environ["COHERE_API_KEY"] = "" + +response = embedding(model="cohere/embed-english-v3.0", input=[""]) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: cohere-embed + litellm_params: + model: cohere/embed-english-v3.0 + api_key: os.environ/COHERE_API_KEY +``` + + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \ +-H 'Authorization: Bearer sk-54d77cd67b9febbb' \ +-H 'Content-Type: application/json' \ +-d '{ + "model": "cohere/embed-english-v3.0", + "input": [""] +}' +``` + + + ## Input Params for `litellm.embedding()` diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index bf16a96e6..ee9a9096f 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -814,6 +814,7 @@ general_settings: | pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) | | enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | | forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). | +| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call | ### router_settings - Reference diff --git a/litellm/__init__.py b/litellm/__init__.py index 3282660e9..b1033e7a4 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -8,6 +8,7 @@ import os from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache +from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES from litellm._logging import ( set_verbose, _turn_on_debug, @@ -136,7 +137,7 @@ enable_azure_ad_token_refresh: Optional[bool] = False ### DEFAULT AZURE API VERSION ### AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest ### COHERE EMBEDDINGS DEFAULT TYPE ### -COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document" +COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document" ### GUARDRAILS ### llamaguard_model_name: Optional[str] = None openai_moderations_model_name: Optional[str] = None diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index fad01f0ff..4b64fb828 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -333,6 +333,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 api_key: Optional[str], dynamic_api_key: Optional[str], ) -> Tuple[str, str, Optional[str], Optional[str]]: + """ + Returns: + Tuple[str, str, Optional[str], Optional[str]]: + model: str + custom_llm_provider: str + dynamic_api_key: Optional[str] + api_base: Optional[str] + """ custom_llm_provider = model.split("/", 1)[0] model = model.split("/", 1)[1] diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 25cdcc2f3..7deb5490d 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -398,6 +398,8 @@ class AnthropicChatCompletion(BaseLLM): error_response = getattr(e, "response", None) if error_headers is None and error_response: error_headers = getattr(error_response, "headers", None) + if error_response and hasattr(error_response, "text"): + error_text = getattr(error_response, "text", error_text) raise AnthropicError( message=error_text, status_code=status_code, diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py index 682e7e654..638a77479 100644 --- a/litellm/llms/azure_ai/embed/handler.py +++ b/litellm/llms/azure_ai/embed/handler.py @@ -9,7 +9,7 @@ import httpx from openai import OpenAI import litellm -from litellm.llms.cohere.embed import embedding as cohere_embedding +from litellm.llms.cohere.embed.handler import embedding as cohere_embedding from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, diff --git a/litellm/llms/bedrock/embed/cohere_transformation.py b/litellm/llms/bedrock/embed/cohere_transformation.py index 7a1ab75fd..1020aa923 100644 --- a/litellm/llms/bedrock/embed/cohere_transformation.py +++ b/litellm/llms/bedrock/embed/cohere_transformation.py @@ -7,6 +7,7 @@ Why separate file? Make it easy to see how transformation works from typing import List import litellm +from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig from litellm.types.llms.bedrock import CohereEmbeddingRequest, CohereEmbeddingResponse from litellm.types.utils import Embedding, EmbeddingResponse @@ -26,15 +27,21 @@ class BedrockCohereEmbeddingConfig: optional_params["embedding_types"] = v return optional_params + def _is_v3_model(self, model: str) -> bool: + return "3" in model + def _transform_request( - self, input: List[str], inference_params: dict + self, model: str, input: List[str], inference_params: dict ) -> CohereEmbeddingRequest: - transformed_request = CohereEmbeddingRequest( - texts=input, - input_type=litellm.COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, # type: ignore + transformed_request = CohereEmbeddingConfig()._transform_request( + model, input, inference_params ) - for k, v in inference_params.items(): - transformed_request[k] = v # type: ignore + new_transformed_request = CohereEmbeddingRequest( + input_type=transformed_request["input_type"], + ) + for k in CohereEmbeddingRequest.__annotations__.keys(): + if k in transformed_request: + new_transformed_request[k] = transformed_request[k] # type: ignore - return transformed_request + return new_transformed_request diff --git a/litellm/llms/bedrock/embed/embedding.py b/litellm/llms/bedrock/embed/embedding.py index 6aefe2040..7a8591a94 100644 --- a/litellm/llms/bedrock/embed/embedding.py +++ b/litellm/llms/bedrock/embed/embedding.py @@ -11,7 +11,7 @@ from typing import Any, Callable, List, Literal, Optional, Tuple, Union import httpx import litellm -from litellm.llms.cohere.embed import embedding as cohere_embedding +from litellm.llms.cohere.embed.handler import embedding as cohere_embedding from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, @@ -369,7 +369,7 @@ class BedrockEmbedding(BaseAWSLLM): batch_data: Optional[List] = None if provider == "cohere": data = BedrockCohereEmbeddingConfig()._transform_request( - input=input, inference_params=inference_params + model=model, input=input, inference_params=inference_params ) elif provider == "amazon" and model in [ "amazon.titan-embed-image-v1", diff --git a/litellm/llms/cohere/embed.py b/litellm/llms/cohere/embed/handler.py similarity index 68% rename from litellm/llms/cohere/embed.py rename to litellm/llms/cohere/embed/handler.py index 5d640b506..95cbec225 100644 --- a/litellm/llms/cohere/embed.py +++ b/litellm/llms/cohere/embed/handler.py @@ -12,8 +12,11 @@ import requests # type: ignore import litellm from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.bedrock import CohereEmbeddingRequest from litellm.utils import Choices, Message, ModelResponse, Usage +from .transformation import CohereEmbeddingConfig + def validate_environment(api_key, headers: dict): headers.update( @@ -41,39 +44,9 @@ class CohereError(Exception): ) # Call the base class constructor with the parameters it needs -def _process_embedding_response( - embeddings: list, - model_response: litellm.EmbeddingResponse, - model: str, - encoding: Any, - input: list, -) -> litellm.EmbeddingResponse: - output_data = [] - for idx, embedding in enumerate(embeddings): - output_data.append( - {"object": "embedding", "index": idx, "embedding": embedding} - ) - model_response.object = "list" - model_response.data = output_data - model_response.model = model - input_tokens = 0 - for text in input: - input_tokens += len(encoding.encode(text)) - - setattr( - model_response, - "usage", - Usage( - prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens - ), - ) - - return model_response - - async def async_embedding( model: str, - data: dict, + data: Union[dict, CohereEmbeddingRequest], input: list, model_response: litellm.utils.EmbeddingResponse, timeout: Optional[Union[float, httpx.Timeout]], @@ -121,19 +94,12 @@ async def async_embedding( ) raise e - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response.text, - ) - - embeddings = response.json()["embeddings"] - ## PROCESS RESPONSE ## - return _process_embedding_response( - embeddings=embeddings, + return CohereEmbeddingConfig()._transform_response( + response=response, + api_key=api_key, + logging_obj=logging_obj, + data=data, model_response=model_response, model=model, encoding=encoding, @@ -149,7 +115,7 @@ def embedding( optional_params: dict, headers: dict, encoding: Any, - data: Optional[dict] = None, + data: Optional[Union[dict, CohereEmbeddingRequest]] = None, complete_api_base: Optional[str] = None, api_key: Optional[str] = None, aembedding: Optional[bool] = None, @@ -159,11 +125,10 @@ def embedding( headers = validate_environment(api_key, headers=headers) embed_url = complete_api_base or "https://api.cohere.ai/v1/embed" model = model - data = data or {"model": model, "texts": input, **optional_params} - if "3" in model and "input_type" not in data: - # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document" - data["input_type"] = "search_document" + data = data or CohereEmbeddingConfig()._transform_request( + model=model, input=input, inference_params=optional_params + ) ## ROUTING if aembedding is True: @@ -193,30 +158,12 @@ def embedding( client = HTTPHandler(concurrent_limit=1) response = client.post(embed_url, headers=headers, data=json.dumps(data)) - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=response, - ) - """ - response - { - 'object': "list", - 'data': [ - - ] - 'model', - 'usage' - } - """ - if response.status_code != 200: - raise CohereError(message=response.text, status_code=response.status_code) - embeddings = response.json()["embeddings"] - return _process_embedding_response( - embeddings=embeddings, + return CohereEmbeddingConfig()._transform_response( + response=response, + api_key=api_key, + logging_obj=logging_obj, + data=data, model_response=model_response, model=model, encoding=encoding, diff --git a/litellm/llms/cohere/embed/transformation.py b/litellm/llms/cohere/embed/transformation.py new file mode 100644 index 000000000..e6bb0f392 --- /dev/null +++ b/litellm/llms/cohere/embed/transformation.py @@ -0,0 +1,160 @@ +""" +Transformation logic from OpenAI /v1/embeddings format to Cohere's /v1/embed format. + +Why separate file? Make it easy to see how transformation works + +Convers +- v3 embedding models +- v2 embedding models + +Docs - https://docs.cohere.com/v2/reference/embed +""" + +import types +from typing import Any, List, Optional, Union + +import httpx + +from litellm import COHERE_DEFAULT_EMBEDDING_INPUT_TYPE +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.llms.bedrock import ( + COHERE_EMBEDDING_INPUT_TYPES, + CohereEmbeddingRequest, + CohereEmbeddingRequestWithModel, +) +from litellm.types.utils import ( + Embedding, + EmbeddingResponse, + PromptTokensDetailsWrapper, + Usage, +) +from litellm.utils import is_base64_encoded + + +class CohereEmbeddingConfig: + """ + Reference: https://docs.cohere.com/v2/reference/embed + """ + + def __init__(self) -> None: + pass + + def get_supported_openai_params(self) -> List[str]: + return ["encoding_format"] + + def map_openai_params( + self, non_default_params: dict, optional_params: dict + ) -> dict: + for k, v in non_default_params.items(): + if k == "encoding_format": + optional_params["embedding_types"] = v + return optional_params + + def _is_v3_model(self, model: str) -> bool: + return "3" in model + + def _transform_request( + self, model: str, input: List[str], inference_params: dict + ) -> CohereEmbeddingRequestWithModel: + is_encoded = False + for input_str in input: + is_encoded = is_base64_encoded(input_str) + + if is_encoded: # check if string is b64 encoded image or not + transformed_request = CohereEmbeddingRequestWithModel( + model=model, + images=input, + input_type="image", + ) + else: + transformed_request = CohereEmbeddingRequestWithModel( + model=model, + texts=input, + input_type=COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, + ) + + for k, v in inference_params.items(): + transformed_request[k] = v # type: ignore + + return transformed_request + + def _calculate_usage(self, input: List[str], encoding: Any, meta: dict) -> Usage: + + input_tokens = 0 + + text_tokens: Optional[int] = meta.get("billed_units", {}).get("input_tokens") + + image_tokens: Optional[int] = meta.get("billed_units", {}).get("images") + + prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None + if image_tokens is None and text_tokens is None: + for text in input: + input_tokens += len(encoding.encode(text)) + else: + prompt_tokens_details = PromptTokensDetailsWrapper( + image_tokens=image_tokens, + text_tokens=text_tokens, + ) + if image_tokens: + input_tokens += image_tokens + if text_tokens: + input_tokens += text_tokens + + return Usage( + prompt_tokens=input_tokens, + completion_tokens=0, + total_tokens=input_tokens, + prompt_tokens_details=prompt_tokens_details, + ) + + def _transform_response( + self, + response: httpx.Response, + api_key: Optional[str], + logging_obj: LiteLLMLoggingObj, + data: Union[dict, CohereEmbeddingRequest], + model_response: EmbeddingResponse, + model: str, + encoding: Any, + input: list, + ) -> EmbeddingResponse: + + response_json = response.json() + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response_json, + ) + """ + response + { + 'object': "list", + 'data': [ + + ] + 'model', + 'usage' + } + """ + embeddings = response_json["embeddings"] + output_data = [] + for idx, embedding in enumerate(embeddings): + output_data.append( + {"object": "embedding", "index": idx, "embedding": embedding} + ) + model_response.object = "list" + model_response.data = output_data + model_response.model = model + input_tokens = 0 + for text in input: + input_tokens += len(encoding.encode(text)) + + setattr( + model_response, + "usage", + self._calculate_usage(input, encoding, response_json.get("meta", {})), + ) + + return model_response diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 89b294584..55851a636 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -152,8 +152,10 @@ class AsyncHTTPHandler: setattr(e, "status_code", e.response.status_code) if stream is True: setattr(e, "message", await e.response.aread()) + setattr(e, "text", await e.response.aread()) else: setattr(e, "message", e.response.text) + setattr(e, "text", e.response.text) raise e except Exception as e: raise e diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 15ee85fae..ebfdd41d0 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -2429,6 +2429,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915 contents: List[BedrockMessageBlock] = [] msg_i = 0 + ## BASE CASE ## + if len(messages) == 0: + raise litellm.BadRequestError( + message=BAD_MESSAGE_ERROR_STR + + "bedrock requires at least one non-system message", + model=model, + llm_provider=llm_provider, + ) + # if initial message is assistant message if messages[0].get("role") is not None and messages[0]["role"] == "assistant": if user_continue_message is not None: diff --git a/litellm/main.py b/litellm/main.py index f239d2612..f6680f2df 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -113,7 +113,7 @@ from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM from .llms.bedrock.embed.embedding import BedrockEmbedding from .llms.cohere import chat as cohere_chat from .llms.cohere import completion as cohere_completion # type: ignore -from .llms.cohere import embed as cohere_embed +from .llms.cohere.embed import handler as cohere_embed from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks.chat import DatabricksChatCompletion from .llms.groq.chat.handler import GroqChatCompletion diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 890ef8688..fe8834dbb 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3364,54 +3364,56 @@ "litellm_provider": "cohere", "mode": "rerank" }, - "embed-english-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, - "input_cost_per_token": 0.00000010, - "output_cost_per_token": 0.00000, - "litellm_provider": "cohere", - "mode": "embedding" - }, "embed-english-light-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-light-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v2.0": { - "max_tokens": 256, - "max_input_tokens": 256, + "max_tokens": 768, + "max_input_tokens": 768, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, + "embed-english-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "input_cost_per_image": 0.0001, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding", + "supports_image_input": true + }, "replicate/meta/llama-2-13b": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 9ee547652..a34dffccd 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -238,11 +238,15 @@ class LiteLLMProxyRequestSetup: - Adds org id """ data = LitellmDataForBackendLLMCall() - _headers = LiteLLMProxyRequestSetup.add_headers_to_llm_call( - headers, user_api_key_dict - ) - if _headers != {}: - data["headers"] = _headers + if ( + general_settings + and general_settings.get("forward_client_headers_to_llm_api") is True + ): + _headers = LiteLLMProxyRequestSetup.add_headers_to_llm_call( + headers, user_api_key_dict + ) + if _headers != {}: + data["headers"] = _headers _organization = LiteLLMProxyRequestSetup.get_openai_org_id_from_headers( headers, general_settings ) diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py index 4fa0b06bb..737aac3c3 100644 --- a/litellm/types/llms/bedrock.py +++ b/litellm/types/llms/bedrock.py @@ -210,15 +210,23 @@ class ServerSentEvent: return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})" +COHERE_EMBEDDING_INPUT_TYPES = Literal[ + "search_document", "search_query", "classification", "clustering", "image" +] + + class CohereEmbeddingRequest(TypedDict, total=False): - texts: Required[List[str]] - input_type: Required[ - Literal["search_document", "search_query", "classification", "clustering"] - ] + texts: List[str] + images: List[str] + input_type: Required[COHERE_EMBEDDING_INPUT_TYPES] truncate: Literal["NONE", "START", "END"] embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"] +class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest): + model: Required[str] + + class CohereEmbeddingResponse(TypedDict): embeddings: List[List[float]] id: str diff --git a/litellm/utils.py b/litellm/utils.py index dc190bc1a..deb3ae8c6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5197,7 +5197,9 @@ def create_proxy_transport_and_mounts(): def validate_environment( # noqa: PLR0915 - model: Optional[str] = None, api_key: Optional[str] = None + model: Optional[str] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, ) -> dict: """ Checks if the environment variables are valid for the given model. @@ -5224,11 +5226,6 @@ def validate_environment( # noqa: PLR0915 _, custom_llm_provider, _, _ = get_llm_provider(model=model) except Exception: custom_llm_provider = None - # # check if llm provider part of model name - # if model.split("/",1)[0] in litellm.provider_list: - # custom_llm_provider = model.split("/", 1)[0] - # model = model.split("/", 1)[1] - # custom_llm_provider_passed_in = True if custom_llm_provider: if custom_llm_provider == "openai": @@ -5497,6 +5494,17 @@ def validate_environment( # noqa: PLR0915 if "api_key" not in key.lower(): new_missing_keys.append(key) missing_keys = new_missing_keys + + if api_base is not None: + new_missing_keys = [] + for key in missing_keys: + if "api_base" not in key.lower(): + new_missing_keys.append(key) + missing_keys = new_missing_keys + + if len(missing_keys) == 0: # no missing keys + keys_in_environment = True + return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys} diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 890ef8688..fe8834dbb 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3364,54 +3364,56 @@ "litellm_provider": "cohere", "mode": "rerank" }, - "embed-english-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, - "input_cost_per_token": 0.00000010, - "output_cost_per_token": 0.00000, - "litellm_provider": "cohere", - "mode": "embedding" - }, "embed-english-light-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v3.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-light-v2.0": { - "max_tokens": 512, - "max_input_tokens": 512, + "max_tokens": 1024, + "max_input_tokens": 1024, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v2.0": { - "max_tokens": 256, - "max_input_tokens": 256, + "max_tokens": 768, + "max_input_tokens": 768, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000, "litellm_provider": "cohere", "mode": "embedding" }, + "embed-english-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "input_cost_per_image": 0.0001, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding", + "supports_image_input": true + }, "replicate/meta/llama-2-13b": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/tests/local_testing/test_embedding.py b/tests/local_testing/test_embedding.py index 4c7560ccc..7993d3280 100644 --- a/tests/local_testing/test_embedding.py +++ b/tests/local_testing/test_embedding.py @@ -1055,3 +1055,28 @@ def test_embedding_response_ratelimit_headers(model): assert int(additional_headers["x-ratelimit-remaining-requests"]) > 0 assert "x-ratelimit-remaining-tokens" in additional_headers assert int(additional_headers["x-ratelimit-remaining-tokens"]) > 0 + + +@pytest.mark.parametrize( + "input, input_type", + [ + ( + [ + "" + ], + "image", + ), + (["hello world"], "text"), + ], +) +def test_cohere_img_embeddings(input, input_type): + litellm.set_verbose = True + response = embedding( + model="cohere/embed-english-v3.0", + input=input, + ) + + if input_type == "image": + assert response.usage.prompt_tokens_details.image_tokens > 0 + else: + assert response.usage.prompt_tokens_details.text_tokens > 0 diff --git a/tests/local_testing/test_get_llm_provider.py b/tests/local_testing/test_get_llm_provider.py index e72373805..f7126cec0 100644 --- a/tests/local_testing/test_get_llm_provider.py +++ b/tests/local_testing/test_get_llm_provider.py @@ -160,3 +160,12 @@ def test_get_llm_provider_jina_ai(): assert custom_llm_provider == "openai_like" assert api_base == "https://api.jina.ai/v1" assert model == "jina-embeddings-v3" + + +def test_get_llm_provider_hosted_vllm(): + model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider( + model="hosted_vllm/llama-3.1-70b-instruct", + ) + assert custom_llm_provider == "hosted_vllm" + assert model == "llama-3.1-70b-instruct" + assert dynamic_api_key == "" diff --git a/tests/local_testing/test_prompt_factory.py b/tests/local_testing/test_prompt_factory.py index 74e7cefa5..7b4e295ce 100644 --- a/tests/local_testing/test_prompt_factory.py +++ b/tests/local_testing/test_prompt_factory.py @@ -675,3 +675,15 @@ def test_alternating_roles_e2e(): "stream": False, } ) + + +def test_just_system_message(): + from litellm.llms.prompt_templates.factory import _bedrock_converse_messages_pt + + with pytest.raises(litellm.BadRequestError) as e: + _bedrock_converse_messages_pt( + messages=[], + model="anthropic.claude-3-sonnet-20240229-v1:0", + llm_provider="bedrock", + ) + assert "bedrock requires at least one non-system message" in str(e.value) diff --git a/tests/local_testing/test_proxy_server.py b/tests/local_testing/test_proxy_server.py index 803243557..51ec085ba 100644 --- a/tests/local_testing/test_proxy_server.py +++ b/tests/local_testing/test_proxy_server.py @@ -225,12 +225,20 @@ def test_add_headers_to_request(litellm_key_header_name): "litellm_key_header_name", ["x-litellm-key", None], ) +@pytest.mark.parametrize( + "forward_headers", + [True, False], +) @mock_patch_acompletion() def test_chat_completion_forward_headers( - mock_acompletion, client_no_auth, litellm_key_header_name + mock_acompletion, client_no_auth, litellm_key_header_name, forward_headers ): global headers try: + if forward_headers: + gs = getattr(litellm.proxy.proxy_server, "general_settings") + gs["forward_client_headers_to_llm_api"] = True + setattr(litellm.proxy.proxy_server, "general_settings", gs) if litellm_key_header_name is not None: gs = getattr(litellm.proxy.proxy_server, "general_settings") gs["litellm_key_header_name"] = litellm_key_header_name @@ -260,23 +268,14 @@ def test_chat_completion_forward_headers( response = client_no_auth.post( "/v1/chat/completions", json=test_data, headers=received_headers ) - mock_acompletion.assert_called_once_with( - model="gpt-3.5-turbo", - messages=[ - {"role": "user", "content": "hi"}, - ], - max_tokens=10, - litellm_call_id=mock.ANY, - litellm_logging_obj=mock.ANY, - request_timeout=mock.ANY, - specific_deployment=True, - metadata=mock.ANY, - proxy_server_request=mock.ANY, - headers={ + if not forward_headers: + assert "headers" not in mock_acompletion.call_args.kwargs + else: + assert mock_acompletion.call_args.kwargs["headers"] == { "x-custom-header": "Custom-Value", "x-another-header": "Another-Value", - }, - ) + } + print(f"response - {response.text}") assert response.status_code == 200 result = response.json() diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py index 9c26da614..3558f88bc 100644 --- a/tests/local_testing/test_utils.py +++ b/tests/local_testing/test_utils.py @@ -331,6 +331,13 @@ def test_validate_environment_api_key(): ), f"Missing keys={response_obj['missing_keys']}" +def test_validate_environment_api_base_dynamic(): + for provider in ["ollama", "ollama_chat"]: + kv = validate_environment(provider + "/mistral", api_base="https://example.com") + assert kv["keys_in_environment"] + assert kv["missing_keys"] == [] + + @mock.patch.dict(os.environ, {"OLLAMA_API_BASE": "foo"}, clear=True) def test_validate_environment_ollama(): for provider in ["ollama", "ollama_chat"]: