diff --git a/fastrepl-proxy b/fastrepl-proxy index c765f07b74..f2fe83e002 160000 --- a/fastrepl-proxy +++ b/fastrepl-proxy @@ -1 +1 @@ -Subproject commit c765f07b74f9a8cae211584ee70bad10e1a847a9 +Subproject commit f2fe83e002a7c3ddedf4e500665644adfd31b9fc diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index e31886e73f..7bed614a6c 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 1aed540c39..4f22f8da54 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 98033a66ed..d099c1c177 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -163,6 +163,7 @@ def completion( custom_llm_provider = model.split("/", 1)[0] model = model.split("/", 1)[1] model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider) + print(f"model: {model}; llm provider: {custom_llm_provider}") # check if user passed in any of the OpenAI optional params optional_params = get_optional_params( functions=functions, @@ -245,7 +246,7 @@ def completion( **optional_params, ) if "stream" in optional_params and optional_params["stream"] == True: - response = CustomStreamWrapper(response, model, logging_obj=logging) + response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging) return response ## LOGGING logging.post_call( @@ -310,7 +311,7 @@ def completion( raise e if "stream" in optional_params and optional_params["stream"] == True: - response = CustomStreamWrapper(response, model, logging_obj=logging) + response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging) return response ## LOGGING logging.post_call( @@ -374,7 +375,7 @@ def completion( **optional_params ) if "stream" in optional_params and optional_params["stream"] == True: - response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging) + response = CustomStreamWrapper(response, model, custom_llm_provider="text-completion-openai", logging_obj=logging) return response ## LOGGING logging.post_call( @@ -446,7 +447,7 @@ def completion( ) if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, - response = CustomStreamWrapper(model_response, model, logging_obj=logging) + response = CustomStreamWrapper(model_response, model, custom_llm_provider="anthropic", logging_obj=logging) return response response = model_response elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud": @@ -493,7 +494,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, - response = CustomStreamWrapper(model_response, model, logging_obj=logging) + response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging) return response response = model_response elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": diff --git a/litellm/utils.py b/litellm/utils.py index e1d0853768..7e6b961fa3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2510,11 +2510,11 @@ class CustomStreamWrapper: model_response = ModelResponse(stream=True, model=self.model) try: # return this for all models + completion_obj = {"content": ""} if self.sent_first_chunk == False: - model_response.choices[0].delta.role = "assistant" + completion_obj["role"] = "assistant" self.sent_first_chunk = True - completion_obj = {"content": ""} # default to role being assistant - if self.model in litellm.anthropic_models: + if self.custom_llm_provider and self.custom_llm_provider == "anthropic": chunk = next(self.completion_stream) completion_obj["content"] = self.handle_anthropic_chunk(chunk) elif self.model == "replicate" or self.custom_llm_provider == "replicate": @@ -2539,10 +2539,10 @@ class CustomStreamWrapper: elif self.custom_llm_provider and self.custom_llm_provider == "vllm": chunk = next(self.completion_stream) completion_obj["content"] = chunk[0].outputs[0].text - elif self.model in litellm.aleph_alpha_models: #aleph alpha doesn't provide streaming + elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming chunk = next(self.completion_stream) completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk) - elif self.model in litellm.open_ai_text_completion_models: + elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai": chunk = next(self.completion_stream) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud": @@ -2551,7 +2551,7 @@ class CustomStreamWrapper: elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models): chunk = next(self.completion_stream) completion_obj["content"] = str(chunk) - elif self.model in litellm.cohere_models or self.custom_llm_provider == "cohere": + elif self.custom_llm_provider == "cohere": chunk = next(self.completion_stream) completion_obj["content"] = self.handle_cohere_chunk(chunk) elif self.custom_llm_provider == "bedrock": @@ -2566,7 +2566,8 @@ class CustomStreamWrapper: # LOGGING threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() model_response.model = self.model - model_response.choices[0].delta["content"] = completion_obj["content"] + if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string + model_response.choices[0].delta = Delta(**completion_obj) return model_response except StopIteration: raise StopIteration diff --git a/pyproject.toml b/pyproject.toml index ca729f02e2..812377984b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.683" +version = "0.1.684" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"