mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
add replicate streaming
This commit is contained in:
parent
c45b132675
commit
1c61b7b229
4 changed files with 61 additions and 36 deletions
|
@ -104,14 +104,39 @@ def completion(
|
|||
"max_new_tokens": 50,
|
||||
}
|
||||
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key="",
|
||||
additional_args={"complete_input_dict": input_data},
|
||||
)
|
||||
## COMPLETION CALL
|
||||
## Replicate Compeltion calls have 2 steps
|
||||
## Step1: Start Prediction: gets a prediction url
|
||||
## Step2: Poll prediction url for response
|
||||
## Step2: is handled with and without streaming
|
||||
prediction_url = start_prediction(version_id, input_data, api_key)
|
||||
print_verbose(prediction_url)
|
||||
|
||||
# Handle the prediction response (streaming or non-streaming)
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
print_verbose("streaming request")
|
||||
return handle_prediction_response_streaming(prediction_url, api_key, print_verbose)
|
||||
else:
|
||||
result = handle_prediction_response(prediction_url, api_key, print_verbose)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key="",
|
||||
original_response=result,
|
||||
additional_args={"complete_input_dict": input_data},
|
||||
)
|
||||
|
||||
print_verbose(f"raw model_response: {result}")
|
||||
|
||||
## Building RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = result
|
||||
|
||||
# Calculate usage
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue