Merge pull request #2879 from BerriAI/litellm_async_anthropic_api

[Feat] Async Anthropic API 97.5% lower median latency
2024-04-07 09:56:52 -07:00 · 2024-04-07 09:56:52 -07:00 · a5aef6ec00
commit a5aef6ec00
parent 3b6b497672 d51e853b60
6 changed files with 339 additions and 150 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8764,7 +8764,9 @@ class CustomStreamWrapper:
        return hold, curr_chunk

    def handle_anthropic_chunk(self, chunk):
-        str_line = chunk.decode("utf-8")  # Convert bytes to string
+        str_line = chunk
+        if isinstance(chunk, bytes):  # Handle binary data
+            str_line = chunk.decode("utf-8")  # Convert bytes to string
        text = ""
        is_finished = False
        finish_reason = None
@ -10024,6 +10026,7 @@ class CustomStreamWrapper:
                or self.custom_llm_provider == "custom_openai"
                or self.custom_llm_provider == "text-completion-openai"
                or self.custom_llm_provider == "azure_text"
+                or self.custom_llm_provider == "anthropic"
                or self.custom_llm_provider == "huggingface"
                or self.custom_llm_provider == "ollama"
                or self.custom_llm_provider == "ollama_chat"