further fixes to together ai streaming

2023-08-26 19:30:54 -07:00 · 2023-08-26 19:30:54 -07:00 · 60873abb41
commit 60873abb41
parent bb3c2fefdb
6 changed files with 39 additions and 21 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -98,6 +98,7 @@ def completion(
        if fallbacks != []:
            return completion_with_fallbacks(**args)
        if litellm.model_alias_map and model in litellm.model_alias_map:
            args["model_alias_map"] = litellm.model_alias_map
            model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in
        model_response = ModelResponse()
        if azure:  # this flag is deprecated, remove once notebooks are also updated.
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}]
 # test on anthropic completion call
-# try:
+try:
-#     response = completion(
+    response = completion(
-#         model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
+        model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
-#     )
+    )
-#     complete_response = ""
+    complete_response = ""
-#     start_time = time.time()
+    start_time = time.time()
-#     for chunk in response:
+    for chunk in response:
-#         chunk_time = time.time()
+        chunk_time = time.time()
-#         print(f"time since initial request: {chunk_time - start_time:.5f}")
+        print(f"time since initial request: {chunk_time - start_time:.5f}")
-#         print(chunk["choices"][0]["delta"])
+        print(chunk["choices"][0]["delta"])
-#         complete_response += chunk["choices"][0]["delta"]["content"]
+        complete_response += chunk["choices"][0]["delta"]["content"]
-#     if complete_response == "": 
+    if complete_response == "": 
-#         raise Exception("Empty response received")
+        raise Exception("Empty response received")
-# except:
+except:
-#     print(f"error occurred: {traceback.format_exc()}")
+    print(f"error occurred: {traceback.format_exc()}")
-#     pass
+    pass
 # # test on huggingface completion call
@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}]
 #     print(f"error occurred: {traceback.format_exc()}")
 #     pass
-# test on together ai completion call
+# test on together ai completion call - replit-code-3b
 try:
    start_time = time.time()
    response = completion(
@ -117,6 +117,25 @@ except:
    print(f"error occurred: {traceback.format_exc()}")
    pass
 # test on together ai completion call - starcoder
 try:
    start_time = time.time()
    response = completion(
        model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True
    )
    complete_response = ""
    print(f"returned response object: {response}")
    for chunk in response:
        chunk_time = time.time() 
        complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else "" 
        if len(complete_response) > 0:
            print(complete_response)
    if complete_response == "": 
        raise Exception("Empty response received")
 except:
    print(f"error occurred: {traceback.format_exc()}")
    pass
 # # test on azure completion call
 # try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1468,8 +1468,6 @@ class CustomStreamWrapper:
        if model in litellm.cohere_models:
            # cohere does not return an iterator, so we need to wrap it in one
            self.completion_stream = iter(completion_stream)
        elif custom_llm_provider == "together_ai":
            self.completion_stream = iter(completion_stream)
        else:
            self.completion_stream = completion_stream
@ -1512,7 +1510,7 @@ class CustomStreamWrapper:
        elif self.model == "replicate":
            chunk = next(self.completion_stream)
            completion_obj["content"] = chunk
-        elif (self.model == "together_ai") or ("togethercomputer"
+        elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer"
                                               in self.model):
            chunk = next(self.completion_stream)
            text_data = self.handle_together_ai_chunk(chunk)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.490"
+version = "0.1.491"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"