litellm-mirror/litellm/llms/watsonx/embed/transformation.py
Krish Dholakia 76795dba39
Deepseek r1 support + watsonx qa improvements (#7907)
* fix(types/utils.py): support returning 'reasoning_content' for deepseek models

Fixes https://github.com/BerriAI/litellm/issues/7877#issuecomment-2603813218

* fix(convert_dict_to_response.py): return deepseek response in provider_specific_field

allows for separating openai vs. non-openai params in model response

* fix(utils.py): support 'provider_specific_field' in delta chunk as well

allows deepseek reasoning content chunk to be returned to user from stream as well

Fixes https://github.com/BerriAI/litellm/issues/7877#issuecomment-2603813218

* fix(watsonx/chat/handler.py): fix passing space id to watsonx on chat route

* fix(watsonx/): fix watsonx_text/ route with space id

* fix(watsonx/): qa item - also adds better unit testing for watsonx embedding calls

* fix(utils.py): rename to '..fields'

* fix: fix linting errors

* fix(utils.py): fix typing - don't show provider-specific field if none or empty - prevents default respons
e from being non-oai compatible

* fix: cleanup unused imports

* docs(deepseek.md): add docs for deepseek reasoning model
2025-01-21 23:13:15 -08:00

111 lines
3.3 KiB
Python

"""
Translates from OpenAI's `/v1/embeddings` to IBM's `/text/embeddings` route.
"""
from typing import Optional
import httpx
from litellm.llms.base_llm.embedding.transformation import (
BaseEmbeddingConfig,
LiteLLMLoggingObj,
)
from litellm.types.llms.openai import AllEmbeddingInputValues
from litellm.types.llms.watsonx import WatsonXAIEndpoint
from litellm.types.utils import EmbeddingResponse, Usage
from ..common_utils import IBMWatsonXMixin, _get_api_params
class IBMWatsonXEmbeddingConfig(IBMWatsonXMixin, BaseEmbeddingConfig):
def get_supported_openai_params(self, model: str) -> list:
return []
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
return optional_params
def transform_embedding_request(
self,
model: str,
input: AllEmbeddingInputValues,
optional_params: dict,
headers: dict,
) -> dict:
watsonx_api_params = _get_api_params(params=optional_params)
watsonx_auth_payload = self._prepare_payload(
model=model,
api_params=watsonx_api_params,
)
return {
"inputs": input,
"parameters": optional_params,
**watsonx_auth_payload,
}
def get_complete_url(
self,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
) -> str:
url = self._get_base_url(api_base=api_base)
endpoint = WatsonXAIEndpoint.EMBEDDINGS.value
if model.startswith("deployment/"):
deployment_id = "/".join(model.split("/")[1:])
endpoint = endpoint.format(deployment_id=deployment_id)
url = url.rstrip("/") + endpoint
## add api version
url = self._add_api_version_to_url(
url=url, api_version=optional_params.pop("api_version", None)
)
return url
def transform_embedding_response(
self,
model: str,
raw_response: httpx.Response,
model_response: EmbeddingResponse,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
request_data: dict,
optional_params: dict,
litellm_params: dict,
) -> EmbeddingResponse:
logging_obj.post_call(
original_response=raw_response.text,
)
json_resp = raw_response.json()
if model_response is None:
model_response = EmbeddingResponse(model=json_resp.get("model_id", None))
results = json_resp.get("results", [])
embedding_response = []
for idx, result in enumerate(results):
embedding_response.append(
{
"object": "embedding",
"index": idx,
"embedding": result["embedding"],
}
)
model_response.object = "list"
model_response.data = embedding_response
input_tokens = json_resp.get("input_token_count", 0)
setattr(
model_response,
"usage",
Usage(
prompt_tokens=input_tokens,
completion_tokens=0,
total_tokens=input_tokens,
),
)
return model_response