forked from phoenix/litellm-mirror
(docs) add cookbook on sagemaker streaming
This commit is contained in:
parent
f29de0024a
commit
9652280c15
1 changed files with 61 additions and 0 deletions
61
cookbook/misc/sagmaker_streaming.py
Normal file
61
cookbook/misc/sagmaker_streaming.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# Notes - on how to do sagemaker streaming using boto3
|
||||||
|
import json
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os, io
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class TokenIterator:
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.byte_iterator = iter(stream)
|
||||||
|
self.buffer = io.BytesIO()
|
||||||
|
self.read_pos = 0
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
while True:
|
||||||
|
self.buffer.seek(self.read_pos)
|
||||||
|
line = self.buffer.readline()
|
||||||
|
if line and line[-1] == ord("\n"):
|
||||||
|
self.read_pos += len(line) + 1
|
||||||
|
full_line = line[:-1].decode("utf-8")
|
||||||
|
line_data = json.loads(full_line.lstrip("data:").rstrip("/n"))
|
||||||
|
return line_data["token"]["text"]
|
||||||
|
chunk = next(self.byte_iterator)
|
||||||
|
self.buffer.seek(0, io.SEEK_END)
|
||||||
|
self.buffer.write(chunk["PayloadPart"]["Bytes"])
|
||||||
|
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"inputs": "How do I build a website?",
|
||||||
|
"parameters": {"max_new_tokens": 256},
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
client = boto3.client("sagemaker-runtime", region_name="us-west-2")
|
||||||
|
response = client.invoke_endpoint_with_response_stream(
|
||||||
|
EndpointName="berri-benchmarking-Llama-2-70b-chat-hf-4",
|
||||||
|
Body=json.dumps(payload),
|
||||||
|
ContentType="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
# for token in TokenIterator(response["Body"]):
|
||||||
|
# print(token)
|
Loading…
Add table
Add a link
Reference in a new issue