diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 496cbc3c9a..da5f7bebea 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -348,6 +348,7 @@ def completion( model_response.usage.completion_tokens = completion_tokens model_response.usage.prompt_tokens = prompt_tokens model_response.usage.total_tokens = prompt_tokens + completion_tokens + model_response._hidden_params["original_response"] = completion_response return model_response diff --git a/litellm/main.py b/litellm/main.py index 4f5a754712..df2b3ce238 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1791,6 +1791,15 @@ def text_completion(*args, **kwargs): # if the model is text-davinci-003, return raw response from openai if kwargs["model"] in litellm.open_ai_text_completion_models and response._hidden_params.get("original_response", None) != None: return response._hidden_params.get("original_response", None) + transformed_logprobs = None + try: + raw_response = response._hidden_params.get("original_response", None) + transformed_logprobs = { + "tokens": [token['text'] for token in raw_response[0]['details']['tokens']], + "token_logprobs": [token['logprob'] for token in raw_response[0]['details']['tokens']] + } + except Exception as e: + print("LiteLLM non blocking exception", e) formatted_response_obj = { "id": response["id"], "object": "text_completion", @@ -1800,7 +1809,7 @@ def text_completion(*args, **kwargs): { "text": response["choices"][0]["message"]["content"], "index": response["choices"][0]["index"], - "logprobs": None, + "logprobs": transformed_logprobs, "finish_reason": response["choices"][0]["finish_reason"] } ],