mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
* test: add initial e2e test * fix(vertex_ai/files): initial commit adding sync file create support * refactor: initial commit of vertex ai non-jsonl files reaching gcp endpoint * fix(vertex_ai/files/transformation.py): initial working commit of non-jsonl file call reaching backend endpoint * fix(vertex_ai/files/transformation.py): working e2e non-jsonl file upload * test: working e2e jsonl call * test: unit testing for jsonl file creation * fix(vertex_ai/transformation.py): reset file pointer after read allow multiple reads on same file object * fix: fix linting errors * fix: fix ruff linting errors * fix: fix import * fix: fix linting error * fix: fix linting error * fix(vertex_ai/files/transformation.py): fix linting error * test: update test * test: update tests * fix: fix linting errors * fix: fix test * fix: fix linting error
124 lines
3.5 KiB
Python
124 lines
3.5 KiB
Python
from typing import List, Optional, Union
|
|
|
|
import httpx
|
|
|
|
from litellm.llms.base_llm.chat.transformation import AllMessageValues, BaseLLMException
|
|
from litellm.llms.base_llm.embedding.transformation import (
|
|
BaseEmbeddingConfig,
|
|
LiteLLMLoggingObj,
|
|
)
|
|
from litellm.types.llms.openai import AllEmbeddingInputValues
|
|
from litellm.types.utils import EmbeddingResponse
|
|
|
|
from ..common_utils import TritonError
|
|
|
|
|
|
class TritonEmbeddingConfig(BaseEmbeddingConfig):
|
|
"""
|
|
Transformations for triton /embeddings endpoint (This is a trtllm model)
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
pass
|
|
|
|
def get_supported_openai_params(self, model: str) -> list:
|
|
return []
|
|
|
|
def map_openai_params(
|
|
self,
|
|
non_default_params: dict,
|
|
optional_params: dict,
|
|
model: str,
|
|
drop_params: bool,
|
|
) -> dict:
|
|
"""
|
|
Map OpenAI params to Triton Embedding params
|
|
"""
|
|
return optional_params
|
|
|
|
def validate_environment(
|
|
self,
|
|
headers: dict,
|
|
model: str,
|
|
messages: List[AllMessageValues],
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
) -> dict:
|
|
return {}
|
|
|
|
def transform_embedding_request(
|
|
self,
|
|
model: str,
|
|
input: AllEmbeddingInputValues,
|
|
optional_params: dict,
|
|
headers: dict,
|
|
) -> dict:
|
|
return {
|
|
"inputs": [
|
|
{
|
|
"name": "input_text",
|
|
"shape": [len(input)],
|
|
"datatype": "BYTES",
|
|
"data": input,
|
|
}
|
|
]
|
|
}
|
|
|
|
def transform_embedding_response(
|
|
self,
|
|
model: str,
|
|
raw_response: httpx.Response,
|
|
model_response: EmbeddingResponse,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
api_key: Optional[str] = None,
|
|
request_data: dict = {},
|
|
optional_params: dict = {},
|
|
litellm_params: dict = {},
|
|
) -> EmbeddingResponse:
|
|
try:
|
|
raw_response_json = raw_response.json()
|
|
except Exception:
|
|
raise TritonError(
|
|
message=raw_response.text, status_code=raw_response.status_code
|
|
)
|
|
|
|
_embedding_output = []
|
|
|
|
_outputs = raw_response_json["outputs"]
|
|
for output in _outputs:
|
|
_shape = output["shape"]
|
|
_data = output["data"]
|
|
_split_output_data = self.split_embedding_by_shape(_data, _shape)
|
|
|
|
for idx, embedding in enumerate(_split_output_data):
|
|
_embedding_output.append(
|
|
{
|
|
"object": "embedding",
|
|
"index": idx,
|
|
"embedding": embedding,
|
|
}
|
|
)
|
|
|
|
model_response.model = raw_response_json.get("model_name", "None")
|
|
model_response.data = _embedding_output
|
|
return model_response
|
|
|
|
def get_error_class(
|
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
|
) -> BaseLLMException:
|
|
return TritonError(
|
|
message=error_message, status_code=status_code, headers=headers
|
|
)
|
|
|
|
@staticmethod
|
|
def split_embedding_by_shape(
|
|
data: List[float], shape: List[int]
|
|
) -> List[List[float]]:
|
|
if len(shape) != 2:
|
|
raise ValueError("Shape must be of length 2.")
|
|
embedding_size = shape[1]
|
|
return [
|
|
data[i * embedding_size : (i + 1) * embedding_size] for i in range(shape[0])
|
|
]
|