mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
fix anthropic streaming
This commit is contained in:
parent
45b984cb79
commit
0ac17646d9
4 changed files with 66 additions and 58 deletions
Binary file not shown.
Binary file not shown.
|
@ -92,12 +92,15 @@ class AnthropicLLM:
|
|||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
## COMPLETION CALL
|
||||
response = requests.post(
|
||||
self.completion_url, headers=self.headers, data=json.dumps(data)
|
||||
)
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
response = requests.post(
|
||||
self.completion_url, headers=self.headers, data=json.dumps(data), stream=optional_params["stream"]
|
||||
)
|
||||
return response.iter_lines()
|
||||
else:
|
||||
response = requests.post(
|
||||
self.completion_url, headers=self.headers, data=json.dumps(data)
|
||||
)
|
||||
## LOGGING
|
||||
self.logging_obj.post_call(
|
||||
input=prompt,
|
||||
|
|
|
@ -24,41 +24,41 @@ user_message = "Hello, how are you?"
|
|||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
# test on openai completion call
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
)
|
||||
response = ""
|
||||
start_time = time.time()
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
response += chunk["choices"][0]["delta"]
|
||||
if response == "":
|
||||
raise Exception("Empty response received")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
# )
|
||||
# complete_response = ""
|
||||
# start_time = time.time()
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
# if complete_response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
|
||||
|
||||
# test on azure completion call
|
||||
try:
|
||||
response = completion(
|
||||
model="azure/chatgpt-test", messages=messages, stream=True, logger_fn=logger_fn
|
||||
)
|
||||
response = ""
|
||||
start_time = time.time()
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
response += chunk["choices"][0]["delta"]
|
||||
if response == "":
|
||||
raise Exception("Empty response received")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
# # test on azure completion call
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="azure/chatgpt-test", messages=messages, stream=True, logger_fn=logger_fn
|
||||
# )
|
||||
# response = ""
|
||||
# start_time = time.time()
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# response += chunk["choices"][0]["delta"]
|
||||
# if response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
|
||||
|
||||
# test on anthropic completion call
|
||||
|
@ -66,15 +66,20 @@ try:
|
|||
response = completion(
|
||||
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
|
||||
)
|
||||
response = ""
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
time_since_initial_request = []
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
chunk_time = time.time()
|
||||
time_since_initial_request.append(chunk_time - start_time)
|
||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
response += chunk["choices"][0]["delta"]
|
||||
if response == "":
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"set(time_since_initial_request): {set(time_since_initial_request)}")
|
||||
if len(set(time_since_initial_request)) == 1:
|
||||
raise Exception("All time since initial request is the same")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
@ -99,23 +104,23 @@ except:
|
|||
# pass
|
||||
|
||||
# test on together ai completion call
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = completion(
|
||||
model="Replit-Code-3B", messages=messages, logger_fn=logger_fn, stream= True
|
||||
)
|
||||
complete_response = ""
|
||||
print(f"returned response object: {response}")
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else ""
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = completion(
|
||||
# model="Replit-Code-3B", messages=messages, logger_fn=logger_fn, stream= True
|
||||
# )
|
||||
# complete_response = ""
|
||||
# print(f"returned response object: {response}")
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else ""
|
||||
# if complete_response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
|
||||
|
||||
# # test on azure completion call
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue