forked from phoenix/litellm-mirror
add streaming for together-ai
This commit is contained in:
parent
23fc936b65
commit
3c3d144584
3 changed files with 36 additions and 3 deletions
|
@ -375,9 +375,13 @@ def completion(
|
|||
"model": model,
|
||||
"prompt": prompt,
|
||||
"request_type": "language-model-inference",
|
||||
**optional_params
|
||||
},
|
||||
headers=headers
|
||||
)
|
||||
if stream == True:
|
||||
response = CustomStreamWrapper(res, "together_ai")
|
||||
return response
|
||||
|
||||
completion_response = res.json()['output']['choices'][0]['text']
|
||||
|
||||
|
|
|
@ -200,10 +200,21 @@ def test_completion_replicate_stability():
|
|||
|
||||
######## Test TogetherAI ########
|
||||
def test_completion_together_ai():
|
||||
model_name = "togethercomputer/mpt-30b-chat"
|
||||
model_name = "togethercomputer/llama-2-70b-chat"
|
||||
try:
|
||||
response = completion(model=model_name, messages=messages, together_ai=True)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_completion_together_ai_stream():
|
||||
model_name = "togethercomputer/llama-2-70b-chat"
|
||||
try:
|
||||
response = completion(model=model_name, messages=messages, together_ai=True, stream=True)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk['choices'][0]['delta']) # same as openai format
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
|
@ -247,7 +247,7 @@ def get_optional_params(
|
|||
return optional_params
|
||||
elif together_ai == True:
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
optional_params["stream_tokens"] = stream
|
||||
if temperature != 1:
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p != 1:
|
||||
|
@ -652,12 +652,25 @@ class CustomStreamWrapper:
|
|||
if model in litellm.cohere_models:
|
||||
# cohere does not return an iterator, so we need to wrap it in one
|
||||
self.completion_stream = iter(completion_stream)
|
||||
elif model == "together_ai":
|
||||
self.completion_stream = iter(completion_stream)
|
||||
else:
|
||||
self.completion_stream = completion_stream
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def handle_together_ai_chunk(self, chunk):
|
||||
chunk = chunk.decode("utf-8")
|
||||
text_index = chunk.find('"text":"') # this checks if text: exists
|
||||
text_start = text_index + len('"text":"')
|
||||
text_end = chunk.find('"}', text_start)
|
||||
if text_index != -1 and text_end != -1:
|
||||
extracted_text = chunk[text_start:text_end]
|
||||
return extracted_text
|
||||
else:
|
||||
return ""
|
||||
|
||||
def __next__(self):
|
||||
completion_obj ={ "role": "assistant", "content": ""}
|
||||
if self.model in litellm.anthropic_models:
|
||||
|
@ -666,9 +679,14 @@ class CustomStreamWrapper:
|
|||
elif self.model == "replicate":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = chunk
|
||||
elif self.model == "together_ai":
|
||||
chunk = next(self.completion_stream)
|
||||
text_data = self.handle_together_ai_chunk(chunk)
|
||||
if text_data == "":
|
||||
return self.__next__()
|
||||
completion_obj["content"] = text_data
|
||||
elif self.model in litellm.cohere_models:
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = chunk.text
|
||||
# return this for all models
|
||||
return {"choices": [{"delta": completion_obj}]}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue