mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(bedrock.py): fix output format for cohere embeddings
This commit is contained in:
parent
7ddc0dad24
commit
c1e95740b0
4 changed files with 40 additions and 14 deletions
|
@ -552,8 +552,8 @@ def _embedding_func_single(
|
||||||
## FORMAT EMBEDDING INPUT ##
|
## FORMAT EMBEDDING INPUT ##
|
||||||
provider = model.split(".")[0]
|
provider = model.split(".")[0]
|
||||||
inference_params = copy.deepcopy(optional_params)
|
inference_params = copy.deepcopy(optional_params)
|
||||||
input = input.replace(os.linesep, " ")
|
|
||||||
if provider == "amazon":
|
if provider == "amazon":
|
||||||
|
input = input.replace(os.linesep, " ")
|
||||||
data = {"inputText": input, **inference_params}
|
data = {"inputText": input, **inference_params}
|
||||||
# data = json.dumps(data)
|
# data = json.dumps(data)
|
||||||
elif provider == "cohere":
|
elif provider == "cohere":
|
||||||
|
@ -590,7 +590,10 @@ def _embedding_func_single(
|
||||||
original_response=response_body,
|
original_response=response_body,
|
||||||
)
|
)
|
||||||
if provider == "cohere":
|
if provider == "cohere":
|
||||||
return response_body.get("embeddings")
|
response = response_body.get("embeddings")
|
||||||
|
# flatten list
|
||||||
|
response = [item for sublist in response for item in sublist]
|
||||||
|
return response
|
||||||
elif provider == "amazon":
|
elif provider == "amazon":
|
||||||
return response_body.get("embedding")
|
return response_body.get("embedding")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -1775,16 +1775,20 @@ def embedding(
|
||||||
rpm = kwargs.pop("rpm", None)
|
rpm = kwargs.pop("rpm", None)
|
||||||
tpm = kwargs.pop("tpm", None)
|
tpm = kwargs.pop("tpm", None)
|
||||||
aembedding = kwargs.pop("aembedding", None)
|
aembedding = kwargs.pop("aembedding", None)
|
||||||
|
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", "encoding_format"]
|
||||||
|
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name"]
|
||||||
|
default_params = openai_params + litellm_params
|
||||||
|
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||||
optional_params = {}
|
optional_params = {}
|
||||||
for param in kwargs:
|
for param in non_default_params:
|
||||||
if param != "metadata": # filter out metadata from optional_params
|
optional_params[param] = kwargs[param]
|
||||||
optional_params[param] = kwargs[param]
|
|
||||||
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base, api_key=api_key)
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base, api_key=api_key)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = None
|
response = None
|
||||||
logging = litellm_logging_obj
|
logging = litellm_logging_obj
|
||||||
logging.update_environment_variables(model=model, user="", optional_params={}, litellm_params={"timeout": timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn})
|
logging.update_environment_variables(model=model, user="", optional_params=optional_params, litellm_params={"timeout": timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn})
|
||||||
if azure == True or custom_llm_provider == "azure":
|
if azure == True or custom_llm_provider == "azure":
|
||||||
# azure configs
|
# azure configs
|
||||||
api_type = get_secret("AZURE_API_TYPE") or "azure"
|
api_type = get_secret("AZURE_API_TYPE") or "azure"
|
||||||
|
@ -1903,7 +1907,7 @@ def embedding(
|
||||||
input=input,
|
input=input,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
optional_params=kwargs,
|
optional_params=optional_params,
|
||||||
model_response= EmbeddingResponse()
|
model_response= EmbeddingResponse()
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
|
@ -1912,7 +1916,7 @@ def embedding(
|
||||||
input=input,
|
input=input,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
optional_params=kwargs,
|
optional_params=optional_params,
|
||||||
model_response= EmbeddingResponse(),
|
model_response= EmbeddingResponse(),
|
||||||
print_verbose=print_verbose
|
print_verbose=print_verbose
|
||||||
)
|
)
|
||||||
|
|
|
@ -989,6 +989,7 @@ async def embeddings(request: Request, user_api_key_dict: UserAPIKeyAuth = Depen
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
data = orjson.loads(body)
|
data = orjson.loads(body)
|
||||||
|
|
||||||
|
|
||||||
data["user"] = user_api_key_dict.user_id
|
data["user"] = user_api_key_dict.user_id
|
||||||
data["model"] = (
|
data["model"] = (
|
||||||
general_settings.get("embedding_model", None) # server default
|
general_settings.get("embedding_model", None) # server default
|
||||||
|
@ -1001,9 +1002,24 @@ async def embeddings(request: Request, user_api_key_dict: UserAPIKeyAuth = Depen
|
||||||
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
||||||
else:
|
else:
|
||||||
data["metadata"] = {"user_api_key": user_api_key_dict.api_key}
|
data["metadata"] = {"user_api_key": user_api_key_dict.api_key}
|
||||||
|
router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else []
|
||||||
|
print(f"received data: {data['input']}")
|
||||||
|
if "input" in data and isinstance(data['input'], list) and isinstance(data['input'][0], list) and isinstance(data['input'][0][0], int): # check if array of tokens passed in
|
||||||
|
# check if non-openai/azure model called - e.g. for langchain integration
|
||||||
|
if data["model"] in router_model_names:
|
||||||
|
for m in llm_model_list:
|
||||||
|
if m["model_name"] == data["model"] and (m["litellm_params"]["model"] in litellm.open_ai_embedding_models
|
||||||
|
or m["litellm_params"]["model"].startswith("azure/")):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# non-openai/azure embedding model called with token input
|
||||||
|
input_list = []
|
||||||
|
for i in data["input"]:
|
||||||
|
input_list.append(litellm.decode(model="gpt-3.5-turbo", tokens=i))
|
||||||
|
data["input"] = input_list
|
||||||
|
break
|
||||||
|
|
||||||
## ROUTE TO CORRECT ENDPOINT ##
|
## ROUTE TO CORRECT ENDPOINT ##
|
||||||
router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else []
|
|
||||||
if llm_router is not None and data["model"] in router_model_names: # model in router model list
|
if llm_router is not None and data["model"] in router_model_names: # model in router model list
|
||||||
response = await llm_router.aembedding(**data)
|
response = await llm_router.aembedding(**data)
|
||||||
elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
|
elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
|
||||||
|
|
|
@ -161,20 +161,23 @@ def test_bedrock_embedding_titan():
|
||||||
print(f"response:", response)
|
print(f"response:", response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_bedrock_embedding_titan()
|
# test_bedrock_embedding_titan()
|
||||||
|
|
||||||
def test_bedrock_embedding_cohere():
|
def test_bedrock_embedding_cohere():
|
||||||
try:
|
try:
|
||||||
# litellm.set_verbose=True
|
litellm.set_verbose=False
|
||||||
response = embedding(
|
response = embedding(
|
||||||
model="cohere.embed-multilingual-v3", input=["good morning from litellm, attempting to embed data", "lets test a second string for good measure"],
|
model="cohere.embed-multilingual-v3", input=["good morning from litellm, attempting to embed data", "lets test a second string for good measure"],
|
||||||
aws_region_name="os.environ/AWS_REGION_NAME_2"
|
aws_region_name="os.environ/AWS_REGION_NAME_2"
|
||||||
)
|
)
|
||||||
|
assert isinstance(response['data'][0]['embedding'], list), "Expected response to be a list"
|
||||||
|
print(f"type of first embedding:", type(response['data'][0]['embedding'][0]))
|
||||||
|
assert all(isinstance(x, float) for x in response['data'][0]['embedding']), "Expected response to be a list of floats"
|
||||||
# print(f"response:", response)
|
# print(f"response:", response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
# test_bedrock_embedding_cohere()
|
test_bedrock_embedding_cohere()
|
||||||
|
|
||||||
# comment out hf tests - since hf endpoints are unstable
|
# comment out hf tests - since hf endpoints are unstable
|
||||||
def test_hf_embedding():
|
def test_hf_embedding():
|
||||||
|
@ -234,7 +237,7 @@ def test_sagemaker_embeddings():
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_sagemaker_embeddings()
|
# test_sagemaker_embeddings()
|
||||||
# def local_proxy_embeddings():
|
# def local_proxy_embeddings():
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
# response = embedding(
|
# response = embedding(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue