diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 7048c99f8..58ce173b3 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index ef3e1e402..773d47432 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index f0238a6e7..6a48c7f57 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -98,6 +98,7 @@ def completion( if fallbacks != []: return completion_with_fallbacks(**args) if litellm.model_alias_map and model in litellm.model_alias_map: + args["model_alias_map"] = litellm.model_alias_map model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in model_response = ModelResponse() if azure: # this flag is deprecated, remove once notebooks are also updated. diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index b6deb2098..b6e37a7e8 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}] # test on anthropic completion call -# try: -# response = completion( -# model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn -# ) -# complete_response = "" -# start_time = time.time() -# for chunk in response: -# chunk_time = time.time() -# print(f"time since initial request: {chunk_time - start_time:.5f}") -# print(chunk["choices"][0]["delta"]) -# complete_response += chunk["choices"][0]["delta"]["content"] -# if complete_response == "": -# raise Exception("Empty response received") -# except: -# print(f"error occurred: {traceback.format_exc()}") -# pass +try: + response = completion( + model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn + ) + complete_response = "" + start_time = time.time() + for chunk in response: + chunk_time = time.time() + print(f"time since initial request: {chunk_time - start_time:.5f}") + print(chunk["choices"][0]["delta"]) + complete_response += chunk["choices"][0]["delta"]["content"] + if complete_response == "": + raise Exception("Empty response received") +except: + print(f"error occurred: {traceback.format_exc()}") + pass # # test on huggingface completion call @@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}] # print(f"error occurred: {traceback.format_exc()}") # pass -# test on together ai completion call +# test on together ai completion call - replit-code-3b try: start_time = time.time() response = completion( @@ -117,6 +117,25 @@ except: print(f"error occurred: {traceback.format_exc()}") pass +# test on together ai completion call - starcoder +try: + start_time = time.time() + response = completion( + model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True + ) + complete_response = "" + print(f"returned response object: {response}") + for chunk in response: + chunk_time = time.time() + complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else "" + if len(complete_response) > 0: + print(complete_response) + if complete_response == "": + raise Exception("Empty response received") +except: + print(f"error occurred: {traceback.format_exc()}") + pass + # # test on azure completion call # try: diff --git a/litellm/utils.py b/litellm/utils.py index 77ce504cf..719ec6514 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1468,8 +1468,6 @@ class CustomStreamWrapper: if model in litellm.cohere_models: # cohere does not return an iterator, so we need to wrap it in one self.completion_stream = iter(completion_stream) - elif custom_llm_provider == "together_ai": - self.completion_stream = iter(completion_stream) else: self.completion_stream = completion_stream @@ -1512,7 +1510,7 @@ class CustomStreamWrapper: elif self.model == "replicate": chunk = next(self.completion_stream) completion_obj["content"] = chunk - elif (self.model == "together_ai") or ("togethercomputer" + elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer" in self.model): chunk = next(self.completion_stream) text_data = self.handle_together_ai_chunk(chunk) diff --git a/pyproject.toml b/pyproject.toml index 20675dce8..f51666c0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.490" +version = "0.1.491" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"