(feat) streaming claude-3

2024-03-04 07:29:23 -08:00 · 2024-03-04 07:29:23 -08:00 · fdd8199a25
commit fdd8199a25
parent 19eb9063fb
2 changed files with 33 additions and 3 deletions
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -100,6 +100,25 @@ def test_completion_claude_3():
        pytest.fail(f"Error occurred: {e}")
 def test_completion_claude_3_stream():
    litellm.set_verbose = False
    messages = [{"role": "user", "content": "Hello, world"}]
    try:
        # test without max tokens
        response = completion(
            model="anthropic/claude-3-opus-20240229",
            messages=messages,
            max_tokens=10,
            stream=True,
        )
        # Add any assertions, here to check response args
        print(response)
        for chunk in response:
            print(chunk)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_mistral_api():
    try:
        litellm.set_verbose = True
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8017,10 +8017,21 @@ class CustomStreamWrapper:
        finish_reason = None
        if str_line.startswith("data:"):
            data_json = json.loads(str_line[5:])
-            text = data_json.get("completion", "")
+            type_chunk = data_json.get("type", None)
-            if data_json.get("stop_reason", None):
+            if type_chunk == "content_block_delta":
                """
                Anthropic content chunk
                chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
                """
                text = data_json.get("delta", {}).get("text", "")
            elif type_chunk == "message_delta":
                """
                Anthropic
                chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
                """
                # TODO - get usage from this chunk, set in response
                finish_reason = data_json.get("delta", {}).get("stop_reason", None)
                is_finished = True
                finish_reason = data_json["stop_reason"]
            return {
                "text": text,
                "is_finished": is_finished,