mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(feat) add support for echo for HF logprobs
This commit is contained in:
parent
847de09308
commit
ce462824be
2 changed files with 16 additions and 2 deletions
|
@ -1792,11 +1792,21 @@ def text_completion(*args, **kwargs):
|
||||||
if kwargs["model"] in litellm.open_ai_text_completion_models and response._hidden_params.get("original_response", None) != None:
|
if kwargs["model"] in litellm.open_ai_text_completion_models and response._hidden_params.get("original_response", None) != None:
|
||||||
return response._hidden_params.get("original_response", None)
|
return response._hidden_params.get("original_response", None)
|
||||||
transformed_logprobs = None
|
transformed_logprobs = None
|
||||||
|
# only supported for TGI models
|
||||||
try:
|
try:
|
||||||
raw_response = response._hidden_params.get("original_response", None)
|
raw_response = response._hidden_params.get("original_response", None)
|
||||||
|
tokens = []
|
||||||
|
token_logprobs = []
|
||||||
|
if "prefill" in raw_response[0]["details"]:
|
||||||
|
prefills = raw_response[0]["details"]['prefill']
|
||||||
|
for prefill in prefills:
|
||||||
|
tokens.append(prefill['text'])
|
||||||
|
token_logprobs.append(prefill['logprob'])
|
||||||
|
new_tokens = [token['text'] for token in raw_response[0]['details']['tokens']]
|
||||||
|
new_token_logprobs = [token['logprob'] for token in raw_response[0]['details']['tokens']]
|
||||||
transformed_logprobs = {
|
transformed_logprobs = {
|
||||||
"tokens": [token['text'] for token in raw_response[0]['details']['tokens']],
|
"tokens": tokens + new_tokens,
|
||||||
"token_logprobs": [token['logprob'] for token in raw_response[0]['details']['tokens']]
|
"token_logprobs": token_logprobs + new_token_logprobs
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("LiteLLM non blocking exception", e)
|
print("LiteLLM non blocking exception", e)
|
||||||
|
|
|
@ -1348,6 +1348,10 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["best_of"] = n
|
optional_params["best_of"] = n
|
||||||
if presence_penalty:
|
if presence_penalty:
|
||||||
optional_params["repetition_penalty"] = presence_penalty
|
optional_params["repetition_penalty"] = presence_penalty
|
||||||
|
if "echo" in special_params:
|
||||||
|
# https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation.decoder_input_details
|
||||||
|
# Return the decoder input token logprobs and ids. You must set details=True as well for it to be taken into account. Defaults to False
|
||||||
|
optional_params["decoder_input_details"] = special_params["echo"]
|
||||||
elif custom_llm_provider == "together_ai":
|
elif custom_llm_provider == "together_ai":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "frequency_penalty"]
|
supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "frequency_penalty"]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue