forked from phoenix/litellm-mirror
(feat) 10x faster embeddings
This commit is contained in:
parent
6738044170
commit
23466107a7
1 changed files with 5 additions and 16 deletions
|
@ -309,8 +309,8 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
timeout: float,
|
timeout: float,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str] = None,
|
||||||
|
model_response: litellm.utils.EmbeddingResponse = None,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
model_response=None,
|
|
||||||
optional_params=None,
|
optional_params=None,
|
||||||
):
|
):
|
||||||
super().embedding()
|
super().embedding()
|
||||||
|
@ -342,21 +342,10 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
original_response=response,
|
original_response=response,
|
||||||
)
|
)
|
||||||
|
model_response.data = response.data
|
||||||
embedding_response = json.loads(response.model_dump_json())
|
model_response.model = model
|
||||||
output_data = []
|
model_response.usage = response.usage
|
||||||
for idx, embedding in enumerate(embedding_response["data"]):
|
model_response.object = "list"
|
||||||
output_data.append(
|
|
||||||
{
|
|
||||||
"object": embedding["object"],
|
|
||||||
"index": embedding["index"],
|
|
||||||
"embedding": embedding["embedding"]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
model_response["object"] = "list"
|
|
||||||
model_response["data"] = output_data
|
|
||||||
model_response["model"] = model
|
|
||||||
model_response["usage"] = embedding_response["usage"]
|
|
||||||
return model_response
|
return model_response
|
||||||
except OpenAIError as e:
|
except OpenAIError as e:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue