forked from phoenix/litellm-mirror
palm streaming
This commit is contained in:
parent
5000231eae
commit
e7a9246414
2 changed files with 24 additions and 3 deletions
|
@ -985,6 +985,11 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["frequency_penalty"] = frequency_penalty # TODO: Check if should be repetition penalty
|
||||
if stop != None:
|
||||
optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","<|endoftext|>"]
|
||||
elif custom_llm_provider == "palm":
|
||||
if temperature != 1:
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p != 1:
|
||||
optional_params["top_p"] = top_p
|
||||
elif (
|
||||
model in litellm.vertex_chat_models or model in litellm.vertex_code_chat_models
|
||||
): # chat-bison has diff args from chat-bison@001, ty Google :)
|
||||
|
@ -3088,6 +3093,19 @@ class CustomStreamWrapper:
|
|||
completion_obj["content"] = new_chunk
|
||||
self.completion_stream = self.completion_stream[chunk_size:]
|
||||
time.sleep(0.05)
|
||||
elif self.custom_llm_provider == "palm":
|
||||
# fake streaming
|
||||
if len(self.completion_stream)==0:
|
||||
if self.sent_last_chunk:
|
||||
raise StopIteration
|
||||
else:
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
self.sent_last_chunk = True
|
||||
chunk_size = 30
|
||||
new_chunk = self.completion_stream[:chunk_size]
|
||||
completion_obj["content"] = new_chunk
|
||||
self.completion_stream = self.completion_stream[chunk_size:]
|
||||
time.sleep(0.05)
|
||||
else: # openai chat/azure models
|
||||
chunk = next(self.completion_stream)
|
||||
model_response = chunk
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue