forked from phoenix/litellm-mirror
(feat) streaming claude-3
This commit is contained in:
parent
19eb9063fb
commit
fdd8199a25
2 changed files with 33 additions and 3 deletions
|
@ -100,6 +100,25 @@ def test_completion_claude_3():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_claude_3_stream():
|
||||||
|
litellm.set_verbose = False
|
||||||
|
messages = [{"role": "user", "content": "Hello, world"}]
|
||||||
|
try:
|
||||||
|
# test without max tokens
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-opus-20240229",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
# Add any assertions, here to check response args
|
||||||
|
print(response)
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_mistral_api():
|
def test_completion_mistral_api():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
|
@ -8017,10 +8017,21 @@ class CustomStreamWrapper:
|
||||||
finish_reason = None
|
finish_reason = None
|
||||||
if str_line.startswith("data:"):
|
if str_line.startswith("data:"):
|
||||||
data_json = json.loads(str_line[5:])
|
data_json = json.loads(str_line[5:])
|
||||||
text = data_json.get("completion", "")
|
type_chunk = data_json.get("type", None)
|
||||||
if data_json.get("stop_reason", None):
|
if type_chunk == "content_block_delta":
|
||||||
|
"""
|
||||||
|
Anthropic content chunk
|
||||||
|
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
|
||||||
|
"""
|
||||||
|
text = data_json.get("delta", {}).get("text", "")
|
||||||
|
elif type_chunk == "message_delta":
|
||||||
|
"""
|
||||||
|
Anthropic
|
||||||
|
chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
|
||||||
|
"""
|
||||||
|
# TODO - get usage from this chunk, set in response
|
||||||
|
finish_reason = data_json.get("delta", {}).get("stop_reason", None)
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = data_json["stop_reason"]
|
|
||||||
return {
|
return {
|
||||||
"text": text,
|
"text": text,
|
||||||
"is_finished": is_finished,
|
"is_finished": is_finished,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue