create model response object

2023-08-17 11:13:01 -07:00 · 2023-08-17 11:13:01 -07:00 · 6e540fee7f
commit 6e540fee7f
parent c22e0fe173
4 changed files with 71 additions and 20 deletions
--- a/docs/my-website/docs/completion/output.md
+++ b/docs/my-website/docs/completion/output.md
@ -1,12 +1,20 @@
-# Completion Function - completion()
+# Output Format - completion()
-Here's the exact json output you can expect from a litellm `completion` call:
+Here's the exact json output you can expect from all litellm `completion` calls for all models
 ```python 
-{'choices': [{'finish_reason': 'stop',
+{
  'choices': [
     {
        'finish_reason': 'stop',
        'index': 0,
-   'message': {'role': 'assistant',
+        'message': {
-    'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."}}],
+           'role': 'assistant',
            'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
        }
      }
    ],
 'created': 1691429984.3852863,
 'model': 'claude-instant-1',
- 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}}
+ 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
 }
 ```
--- a/litellm/main.py
+++ b/litellm/main.py
@ -11,21 +11,10 @@ from .llms.huggingface_restapi import HuggingfaceRestAPILLM
 import tiktoken
 from concurrent.futures import ThreadPoolExecutor
 encoding = tiktoken.get_encoding("cl100k_base")
-from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
+from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, ModelResponse, read_config_args
 from litellm.utils import get_ollama_response_stream, stream_to_string, together_ai_completion_streaming
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 new_response = {
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "message": {
                "role": "assistant"
            }
          }
        ]
      }
 # TODO add translations
 ####### COMPLETION ENDPOINTS ################
 #############################################
@ -54,7 +43,8 @@ def completion(
    top_k=40, request_timeout=0, # unused var for old version of OpenAI API
  ):
  try:
-    global new_response
+    new_response = ModelResponse()
    print(new_response)
    if azure: # this flag is deprecated, remove once notebooks are also updated.
      custom_llm_provider="azure"
    args = locals()
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -119,6 +119,14 @@ def test_completion_openai_with_more_optional_params():
        response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai")
        # Add any assertions here to check the response
        print(response)
        response_str = response['choices'][0]['message']['content']
        response_str_2 = response.choices[0].message.content
        print(response['choices'][0]['message']['content'])
        print(response.choices[0].message.content)
        if type(response_str) != str:
            pytest.fail(f"Error occurred: {e}")
        if type(response_str_2) != str:
            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -30,6 +30,51 @@ user_logger_fn = None
 additional_details = {}
 local_cache = {}
 ######## Model Response #########################
 # All liteLLM Model responses will be in this format, Follows the OpenAI Format
 # https://docs.litellm.ai/docs/completion/output
 # {
 #   'choices': [
 #      {
 #         'finish_reason': 'stop',
 #         'index': 0,
 #         'message': {
 #            'role': 'assistant',
 #             'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
 #         }
 #       }
 #     ],
 #  'created': 1691429984.3852863,
 #  'model': 'claude-instant-1',
 #  'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
 # }
 class ModelResponse:
    def __init__(self):
        self.choices = [
          {
            "finish_reason": "stop",
            "index": 0,
            "message": {
                "role": "assistant"
            }
          }
        ]
        self.created = None
        self.model = None
        self.usage = {
            "prompt_tokens": None,
            "completion_tokens": None,
            "total_tokens": None
        }
    def __getitem__(self, key):
        return getattr(self, key)
    def __setitem__(self, key, value):
        setattr(self, key, value)
 def print_verbose(print_statement):
  if litellm.set_verbose:
    print(f"LiteLLM: {print_statement}")