diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 200b0ae58..163a66af3 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -100,6 +100,25 @@ def test_completion_claude_3(): pytest.fail(f"Error occurred: {e}") +def test_completion_claude_3_stream(): + litellm.set_verbose = False + messages = [{"role": "user", "content": "Hello, world"}] + try: + # test without max tokens + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + max_tokens=10, + stream=True, + ) + # Add any assertions, here to check response args + print(response) + for chunk in response: + print(chunk) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_mistral_api(): try: litellm.set_verbose = True diff --git a/litellm/utils.py b/litellm/utils.py index 173f5e79e..c315ca4a4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8017,10 +8017,21 @@ class CustomStreamWrapper: finish_reason = None if str_line.startswith("data:"): data_json = json.loads(str_line[5:]) - text = data_json.get("completion", "") - if data_json.get("stop_reason", None): + type_chunk = data_json.get("type", None) + if type_chunk == "content_block_delta": + """ + Anthropic content chunk + chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}} + """ + text = data_json.get("delta", {}).get("text", "") + elif type_chunk == "message_delta": + """ + Anthropic + chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}} + """ + # TODO - get usage from this chunk, set in response + finish_reason = data_json.get("delta", {}).get("stop_reason", None) is_finished = True - finish_reason = data_json["stop_reason"] return { "text": text, "is_finished": is_finished,