add streaming for together-ai

2023-08-09 17:58:54 -07:00 · 2023-08-09 17:58:54 -07:00 · 3c3d144584
commit 3c3d144584
parent 23fc936b65
3 changed files with 36 additions and 3 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -375,9 +375,13 @@ def completion(
          "model": model,
          "prompt": prompt,
          "request_type": "language-model-inference",
          **optional_params
        }, 
        headers=headers
      )
      if stream == True:
        response = CustomStreamWrapper(res, "together_ai")
        return response
      completion_response = res.json()['output']['choices'][0]['text']
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -200,10 +200,21 @@ def test_completion_replicate_stability():
 ######## Test TogetherAI ########
 def test_completion_together_ai():
-    model_name = "togethercomputer/mpt-30b-chat"
+    model_name = "togethercomputer/llama-2-70b-chat"
    try:
        response = completion(model=model_name, messages=messages, together_ai=True)
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_together_ai_stream():
    model_name = "togethercomputer/llama-2-70b-chat"
    try:
        response = completion(model=model_name, messages=messages, together_ai=True, stream=True)
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk['choices'][0]['delta']) # same as openai format
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -247,7 +247,7 @@ def get_optional_params(
      return optional_params
  elif together_ai == True:
      if stream:
-        optional_params["stream"] = stream
+        optional_params["stream_tokens"] = stream
      if temperature != 1:
          optional_params["temperature"] = temperature
      if top_p != 1:
@ -652,12 +652,25 @@ class CustomStreamWrapper:
        if model in litellm.cohere_models:
           # cohere does not return an iterator, so we need to wrap it in one
           self.completion_stream = iter(completion_stream)
        elif model == "together_ai":
            self.completion_stream = iter(completion_stream)
        else: 
          self.completion_stream = completion_stream
    def __iter__(self):
        return self
    def handle_together_ai_chunk(self, chunk): 
      chunk = chunk.decode("utf-8")
      text_index = chunk.find('"text":"') # this checks if text: exists
      text_start = text_index + len('"text":"')
      text_end = chunk.find('"}', text_start)
      if text_index != -1 and text_end != -1: 
          extracted_text = chunk[text_start:text_end]
          return extracted_text
      else:
          return ""
    def __next__(self):
        completion_obj ={ "role": "assistant", "content": ""}
        if self.model in litellm.anthropic_models:
@ -666,9 +679,14 @@ class CustomStreamWrapper:
        elif self.model == "replicate":
           chunk = next(self.completion_stream)
           completion_obj["content"] = chunk
        elif self.model == "together_ai":
          chunk = next(self.completion_stream)
          text_data =  self.handle_together_ai_chunk(chunk)
          if text_data == "":
             return self.__next__()
          completion_obj["content"] = text_data
        elif self.model in litellm.cohere_models:
          chunk = next(self.completion_stream)
          completion_obj["content"] = chunk.text
        # return this for all models
        return {"choices": [{"delta": completion_obj}]}