diff --git a/cookbook/misc/sagmaker_streaming.py b/cookbook/misc/sagmaker_streaming.py new file mode 100644 index 000000000..81d857b07 --- /dev/null +++ b/cookbook/misc/sagmaker_streaming.py @@ -0,0 +1,61 @@ +# Notes - on how to do sagemaker streaming using boto3 +import json +import boto3 + +import sys, os +import traceback +from dotenv import load_dotenv + +load_dotenv() +import os, io + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import pytest +import litellm + +import io +import json + + +class TokenIterator: + def __init__(self, stream): + self.byte_iterator = iter(stream) + self.buffer = io.BytesIO() + self.read_pos = 0 + + def __iter__(self): + return self + + def __next__(self): + while True: + self.buffer.seek(self.read_pos) + line = self.buffer.readline() + if line and line[-1] == ord("\n"): + self.read_pos += len(line) + 1 + full_line = line[:-1].decode("utf-8") + line_data = json.loads(full_line.lstrip("data:").rstrip("/n")) + return line_data["token"]["text"] + chunk = next(self.byte_iterator) + self.buffer.seek(0, io.SEEK_END) + self.buffer.write(chunk["PayloadPart"]["Bytes"]) + + +payload = { + "inputs": "How do I build a website?", + "parameters": {"max_new_tokens": 256}, + "stream": True, +} + +import boto3 + +client = boto3.client("sagemaker-runtime", region_name="us-west-2") +response = client.invoke_endpoint_with_response_stream( + EndpointName="berri-benchmarking-Llama-2-70b-chat-hf-4", + Body=json.dumps(payload), + ContentType="application/json", +) + +# for token in TokenIterator(response["Body"]): +# print(token)