diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 8b05f0153f..9b65104006 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -552,8 +552,8 @@ def _embedding_func_single( ## FORMAT EMBEDDING INPUT ## provider = model.split(".")[0] inference_params = copy.deepcopy(optional_params) - input = input.replace(os.linesep, " ") if provider == "amazon": + input = input.replace(os.linesep, " ") data = {"inputText": input, **inference_params} # data = json.dumps(data) elif provider == "cohere": @@ -590,7 +590,10 @@ def _embedding_func_single( original_response=response_body, ) if provider == "cohere": - return response_body.get("embeddings") + response = response_body.get("embeddings") + # flatten list + response = [item for sublist in response for item in sublist] + return response elif provider == "amazon": return response_body.get("embedding") except Exception as e: diff --git a/litellm/main.py b/litellm/main.py index a8b499af44..6d7fc34039 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1775,16 +1775,20 @@ def embedding( rpm = kwargs.pop("rpm", None) tpm = kwargs.pop("tpm", None) aembedding = kwargs.pop("aembedding", None) - + openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", "encoding_format"] + litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name"] + default_params = openai_params + litellm_params + non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider optional_params = {} - for param in kwargs: - if param != "metadata": # filter out metadata from optional_params - optional_params[param] = kwargs[param] + for param in non_default_params: + optional_params[param] = kwargs[param] model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base, api_key=api_key) + + try: response = None logging = litellm_logging_obj - logging.update_environment_variables(model=model, user="", optional_params={}, litellm_params={"timeout": timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn}) + logging.update_environment_variables(model=model, user="", optional_params=optional_params, litellm_params={"timeout": timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn}) if azure == True or custom_llm_provider == "azure": # azure configs api_type = get_secret("AZURE_API_TYPE") or "azure" @@ -1903,7 +1907,7 @@ def embedding( input=input, encoding=encoding, logging_obj=logging, - optional_params=kwargs, + optional_params=optional_params, model_response= EmbeddingResponse() ) elif custom_llm_provider == "sagemaker": @@ -1912,7 +1916,7 @@ def embedding( input=input, encoding=encoding, logging_obj=logging, - optional_params=kwargs, + optional_params=optional_params, model_response= EmbeddingResponse(), print_verbose=print_verbose ) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index bff4d5daae..2b0a36934f 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -989,6 +989,7 @@ async def embeddings(request: Request, user_api_key_dict: UserAPIKeyAuth = Depen body = await request.body() data = orjson.loads(body) + data["user"] = user_api_key_dict.user_id data["model"] = ( general_settings.get("embedding_model", None) # server default @@ -1001,9 +1002,24 @@ async def embeddings(request: Request, user_api_key_dict: UserAPIKeyAuth = Depen data["metadata"]["user_api_key"] = user_api_key_dict.api_key else: data["metadata"] = {"user_api_key": user_api_key_dict.api_key} + router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else [] + print(f"received data: {data['input']}") + if "input" in data and isinstance(data['input'], list) and isinstance(data['input'][0], list) and isinstance(data['input'][0][0], int): # check if array of tokens passed in + # check if non-openai/azure model called - e.g. for langchain integration + if data["model"] in router_model_names: + for m in llm_model_list: + if m["model_name"] == data["model"] and (m["litellm_params"]["model"] in litellm.open_ai_embedding_models + or m["litellm_params"]["model"].startswith("azure/")): + pass + else: + # non-openai/azure embedding model called with token input + input_list = [] + for i in data["input"]: + input_list.append(litellm.decode(model="gpt-3.5-turbo", tokens=i)) + data["input"] = input_list + break ## ROUTE TO CORRECT ENDPOINT ## - router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else [] if llm_router is not None and data["model"] in router_model_names: # model in router model list response = await llm_router.aembedding(**data) elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index bca0887ae3..5361d19277 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -161,20 +161,23 @@ def test_bedrock_embedding_titan(): print(f"response:", response) except Exception as e: pytest.fail(f"Error occurred: {e}") -test_bedrock_embedding_titan() +# test_bedrock_embedding_titan() def test_bedrock_embedding_cohere(): try: - # litellm.set_verbose=True + litellm.set_verbose=False response = embedding( model="cohere.embed-multilingual-v3", input=["good morning from litellm, attempting to embed data", "lets test a second string for good measure"], aws_region_name="os.environ/AWS_REGION_NAME_2" ) + assert isinstance(response['data'][0]['embedding'], list), "Expected response to be a list" + print(f"type of first embedding:", type(response['data'][0]['embedding'][0])) + assert all(isinstance(x, float) for x in response['data'][0]['embedding']), "Expected response to be a list of floats" # print(f"response:", response) except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_bedrock_embedding_cohere() +test_bedrock_embedding_cohere() # comment out hf tests - since hf endpoints are unstable def test_hf_embedding(): @@ -234,7 +237,7 @@ def test_sagemaker_embeddings(): print(f"response: {response}") except Exception as e: pytest.fail(f"Error occurred: {e}") -test_sagemaker_embeddings() +# test_sagemaker_embeddings() # def local_proxy_embeddings(): # litellm.set_verbose=True # response = embedding(