mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(utils.py): fix streaming to not return usage dict
Fixes https://github.com/BerriAI/litellm/issues/3237
This commit is contained in:
parent
70c98617da
commit
48c2c3d78a
24 changed files with 107 additions and 83 deletions
|
@ -16,11 +16,11 @@ repos:
|
||||||
name: Check if files match
|
name: Check if files match
|
||||||
entry: python3 ci_cd/check_files_match.py
|
entry: python3 ci_cd/check_files_match.py
|
||||||
language: system
|
language: system
|
||||||
- repo: local
|
# - repo: local
|
||||||
hooks:
|
# hooks:
|
||||||
- id: mypy
|
# - id: mypy
|
||||||
name: mypy
|
# name: mypy
|
||||||
entry: python3 -m mypy --ignore-missing-imports
|
# entry: python3 -m mypy --ignore-missing-imports
|
||||||
language: system
|
# language: system
|
||||||
types: [python]
|
# types: [python]
|
||||||
files: ^litellm/
|
# files: ^litellm/
|
|
@ -298,7 +298,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -137,7 +137,8 @@ class AnthropicTextCompletion(BaseLLM):
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
|
||||||
|
setattr(model_response, "usage", usage)
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
|
@ -55,9 +55,11 @@ def completion(
|
||||||
"inputs": prompt,
|
"inputs": prompt,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"parameters": optional_params,
|
"parameters": optional_params,
|
||||||
"stream": True
|
"stream": (
|
||||||
|
True
|
||||||
if "stream" in optional_params and optional_params["stream"] == True
|
if "stream" in optional_params and optional_params["stream"] == True
|
||||||
else False,
|
else False
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
@ -71,9 +73,11 @@ def completion(
|
||||||
completion_url_fragment_1 + model + completion_url_fragment_2,
|
completion_url_fragment_1 + model + completion_url_fragment_2,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data),
|
data=json.dumps(data),
|
||||||
stream=True
|
stream=(
|
||||||
|
True
|
||||||
if "stream" in optional_params and optional_params["stream"] == True
|
if "stream" in optional_params and optional_params["stream"] == True
|
||||||
else False,
|
else False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
if "text/event-stream" in response.headers["Content-Type"] or (
|
if "text/event-stream" in response.headers["Content-Type"] or (
|
||||||
"stream" in optional_params and optional_params["stream"] == True
|
"stream" in optional_params and optional_params["stream"] == True
|
||||||
|
@ -102,28 +106,28 @@ def completion(
|
||||||
and "data" in completion_response["model_output"]
|
and "data" in completion_response["model_output"]
|
||||||
and isinstance(completion_response["model_output"]["data"], list)
|
and isinstance(completion_response["model_output"]["data"], list)
|
||||||
):
|
):
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["model_output"]["data"][0]
|
||||||
] = completion_response["model_output"]["data"][0]
|
)
|
||||||
elif isinstance(completion_response["model_output"], str):
|
elif isinstance(completion_response["model_output"], str):
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["model_output"]
|
||||||
] = completion_response["model_output"]
|
)
|
||||||
elif "completion" in completion_response and isinstance(
|
elif "completion" in completion_response and isinstance(
|
||||||
completion_response["completion"], str
|
completion_response["completion"], str
|
||||||
):
|
):
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["completion"]
|
||||||
] = completion_response["completion"]
|
)
|
||||||
elif isinstance(completion_response, list) and len(completion_response) > 0:
|
elif isinstance(completion_response, list) and len(completion_response) > 0:
|
||||||
if "generated_text" not in completion_response:
|
if "generated_text" not in completion_response:
|
||||||
raise BasetenError(
|
raise BasetenError(
|
||||||
message=f"Unable to parse response. Original response: {response.text}",
|
message=f"Unable to parse response. Original response: {response.text}",
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
)
|
)
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response[0]["generated_text"]
|
||||||
] = completion_response[0]["generated_text"]
|
)
|
||||||
## GETTING LOGPROBS
|
## GETTING LOGPROBS
|
||||||
if (
|
if (
|
||||||
"details" in completion_response[0]
|
"details" in completion_response[0]
|
||||||
|
@ -155,7 +159,8 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
|
||||||
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1028,7 +1028,7 @@ def completion(
|
||||||
total_tokens=response_body["usage"]["input_tokens"]
|
total_tokens=response_body["usage"]["input_tokens"]
|
||||||
+ response_body["usage"]["output_tokens"],
|
+ response_body["usage"]["output_tokens"],
|
||||||
)
|
)
|
||||||
model_response.usage = _usage
|
setattr(model_response, "usage", _usage)
|
||||||
else:
|
else:
|
||||||
outputText = response_body["completion"]
|
outputText = response_body["completion"]
|
||||||
model_response["finish_reason"] = response_body["stop_reason"]
|
model_response["finish_reason"] = response_body["stop_reason"]
|
||||||
|
@ -1071,8 +1071,10 @@ def completion(
|
||||||
status_code=response_metadata.get("HTTPStatusCode", 500),
|
status_code=response_metadata.get("HTTPStatusCode", 500),
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
## CALCULATING USAGE - bedrock charges on time, not tokens - have some mapping of cost here.
|
||||||
if getattr(model_response.usage, "total_tokens", None) is None:
|
if not hasattr(model_response, "usage"):
|
||||||
|
setattr(model_response, "usage", Usage())
|
||||||
|
if getattr(model_response.usage, "total_tokens", None) is None: # type: ignore
|
||||||
prompt_tokens = response_metadata.get(
|
prompt_tokens = response_metadata.get(
|
||||||
"x-amzn-bedrock-input-token-count", len(encoding.encode(prompt))
|
"x-amzn-bedrock-input-token-count", len(encoding.encode(prompt))
|
||||||
)
|
)
|
||||||
|
@ -1089,7 +1091,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
|
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
|
|
|
@ -167,7 +167,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -237,7 +237,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -305,5 +305,5 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
|
@ -311,7 +311,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -152,9 +152,9 @@ def completion(
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if len(completion_response["answer"]) > 0:
|
if len(completion_response["answer"]) > 0:
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["answer"]
|
||||||
] = completion_response["answer"]
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MaritalkError(
|
raise MaritalkError(
|
||||||
message=response.text, status_code=response.status_code
|
message=response.text, status_code=response.status_code
|
||||||
|
@ -174,7 +174,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -185,9 +185,9 @@ def completion(
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if len(completion_response["generated_text"]) > 0:
|
if len(completion_response["generated_text"]) > 0:
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["generated_text"]
|
||||||
] = completion_response["generated_text"]
|
)
|
||||||
except:
|
except:
|
||||||
raise NLPCloudError(
|
raise NLPCloudError(
|
||||||
message=json.dumps(completion_response),
|
message=json.dumps(completion_response),
|
||||||
|
@ -205,7 +205,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -99,9 +99,9 @@ def completion(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"]["content"] = (
|
||||||
"content"
|
completion_response["choices"][0]["message"]["content"]
|
||||||
] = completion_response["choices"][0]["message"]["content"]
|
)
|
||||||
except:
|
except:
|
||||||
raise OobaboogaError(
|
raise OobaboogaError(
|
||||||
message=json.dumps(completion_response),
|
message=json.dumps(completion_response),
|
||||||
|
@ -115,7 +115,7 @@ def completion(
|
||||||
completion_tokens=completion_response["usage"]["completion_tokens"],
|
completion_tokens=completion_response["usage"]["completion_tokens"],
|
||||||
total_tokens=completion_response["usage"]["total_tokens"],
|
total_tokens=completion_response["usage"]["total_tokens"],
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -223,7 +223,7 @@ class OpenAITextCompletionConfig:
|
||||||
model_response_object.choices = choice_list
|
model_response_object.choices = choice_list
|
||||||
|
|
||||||
if "usage" in response_object:
|
if "usage" in response_object:
|
||||||
model_response_object.usage = response_object["usage"]
|
setattr(model_response_object, "usage", response_object["usage"])
|
||||||
|
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
model_response_object.id = response_object["id"]
|
model_response_object.id = response_object["id"]
|
||||||
|
|
|
@ -191,7 +191,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -41,9 +41,9 @@ class PetalsConfig:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_length: Optional[int] = None
|
max_length: Optional[int] = None
|
||||||
max_new_tokens: Optional[
|
max_new_tokens: Optional[int] = (
|
||||||
int
|
litellm.max_tokens
|
||||||
] = litellm.max_tokens # petals requires max tokens to be set
|
) # petals requires max tokens to be set
|
||||||
do_sample: Optional[bool] = None
|
do_sample: Optional[bool] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
top_k: Optional[int] = None
|
top_k: Optional[int] = None
|
||||||
|
@ -203,7 +203,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -345,7 +345,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -399,7 +399,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
@ -617,7 +617,7 @@ async def async_completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
Deprecated. We now do together ai calls via the openai client.
|
Deprecated. We now do together ai calls via the openai client.
|
||||||
Reference: https://docs.together.ai/docs/openai-api-compatibility
|
Reference: https://docs.together.ai/docs/openai-api-compatibility
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
@ -225,7 +226,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -789,7 +789,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=500, message=str(e))
|
raise VertexAIError(status_code=500, message=str(e))
|
||||||
|
@ -996,7 +996,7 @@ async def async_completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=500, message=str(e))
|
raise VertexAIError(status_code=500, message=str(e))
|
||||||
|
|
|
@ -349,7 +349,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=500, message=str(e))
|
raise VertexAIError(status_code=500, message=str(e))
|
||||||
|
@ -422,7 +422,7 @@ async def async_completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -104,7 +104,7 @@ def completion(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,7 +186,7 @@ def batch_completions(
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
setattr(model_response, "usage", usage)
|
||||||
final_outputs.append(model_response)
|
final_outputs.append(model_response)
|
||||||
return final_outputs
|
return final_outputs
|
||||||
|
|
||||||
|
|
|
@ -407,8 +407,10 @@ def mock_completion(
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
|
|
||||||
model_response.usage = Usage(
|
setattr(
|
||||||
prompt_tokens=10, completion_tokens=20, total_tokens=30
|
model_response,
|
||||||
|
"usage",
|
||||||
|
Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -652,6 +654,7 @@ def completion(
|
||||||
model
|
model
|
||||||
] # update the model to the actual value if an alias has been passed in
|
] # update the model to the actual value if an alias has been passed in
|
||||||
model_response = ModelResponse()
|
model_response = ModelResponse()
|
||||||
|
setattr(model_response, "usage", litellm.Usage())
|
||||||
if (
|
if (
|
||||||
kwargs.get("azure", False) == True
|
kwargs.get("azure", False) == True
|
||||||
): # don't remove flag check, to remain backwards compatible for repos like Codium
|
): # don't remove flag check, to remain backwards compatible for repos like Codium
|
||||||
|
|
|
@ -61,6 +61,7 @@ def validate_first_format(chunk):
|
||||||
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
||||||
assert isinstance(chunk["model"], str), "'model' should be a string."
|
assert isinstance(chunk["model"], str), "'model' should be a string."
|
||||||
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
||||||
|
assert not hasattr(chunk, "usage"), "Chunk cannot contain usage"
|
||||||
|
|
||||||
for choice in chunk["choices"]:
|
for choice in chunk["choices"]:
|
||||||
assert isinstance(choice["index"], int), "'index' should be an integer."
|
assert isinstance(choice["index"], int), "'index' should be an integer."
|
||||||
|
@ -90,6 +91,7 @@ def validate_second_format(chunk):
|
||||||
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
||||||
assert isinstance(chunk["model"], str), "'model' should be a string."
|
assert isinstance(chunk["model"], str), "'model' should be a string."
|
||||||
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
||||||
|
assert not hasattr(chunk, "usage"), "Chunk cannot contain usage"
|
||||||
|
|
||||||
for choice in chunk["choices"]:
|
for choice in chunk["choices"]:
|
||||||
assert isinstance(choice["index"], int), "'index' should be an integer."
|
assert isinstance(choice["index"], int), "'index' should be an integer."
|
||||||
|
@ -127,6 +129,7 @@ def validate_last_format(chunk):
|
||||||
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
assert isinstance(chunk["created"], int), "'created' should be an integer."
|
||||||
assert isinstance(chunk["model"], str), "'model' should be a string."
|
assert isinstance(chunk["model"], str), "'model' should be a string."
|
||||||
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
assert isinstance(chunk["choices"], list), "'choices' should be a list."
|
||||||
|
assert not hasattr(chunk, "usage"), "Chunk cannot contain usage"
|
||||||
|
|
||||||
for choice in chunk["choices"]:
|
for choice in chunk["choices"]:
|
||||||
assert isinstance(choice["index"], int), "'index' should be an integer."
|
assert isinstance(choice["index"], int), "'index' should be an integer."
|
||||||
|
|
|
@ -529,9 +529,6 @@ class ModelResponse(OpenAIObject):
|
||||||
backend changes have been made that might impact determinism.
|
backend changes have been made that might impact determinism.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
usage: Optional[Usage] = None
|
|
||||||
"""Usage statistics for the completion request."""
|
|
||||||
|
|
||||||
_hidden_params: dict = {}
|
_hidden_params: dict = {}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -586,20 +583,27 @@ class ModelResponse(OpenAIObject):
|
||||||
else:
|
else:
|
||||||
created = created
|
created = created
|
||||||
model = model
|
model = model
|
||||||
if usage:
|
if usage is not None:
|
||||||
usage = usage
|
usage = usage
|
||||||
else:
|
elif stream is None or stream == False:
|
||||||
usage = Usage()
|
usage = Usage()
|
||||||
if hidden_params:
|
if hidden_params:
|
||||||
self._hidden_params = hidden_params
|
self._hidden_params = hidden_params
|
||||||
|
|
||||||
|
init_values = {
|
||||||
|
"id": id,
|
||||||
|
"choices": choices,
|
||||||
|
"created": created,
|
||||||
|
"model": model,
|
||||||
|
"object": object,
|
||||||
|
"system_fingerprint": system_fingerprint,
|
||||||
|
}
|
||||||
|
|
||||||
|
if usage is not None:
|
||||||
|
init_values["usage"] = usage
|
||||||
|
|
||||||
super().__init__(
|
super().__init__(
|
||||||
id=id,
|
**init_values,
|
||||||
choices=choices,
|
|
||||||
created=created,
|
|
||||||
model=model,
|
|
||||||
object=object,
|
|
||||||
system_fingerprint=system_fingerprint,
|
|
||||||
usage=usage,
|
|
||||||
**params,
|
**params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -6852,10 +6856,14 @@ async def convert_to_streaming_response_async(response_object: Optional[dict] =
|
||||||
model_response_object.choices = choice_list
|
model_response_object.choices = choice_list
|
||||||
|
|
||||||
if "usage" in response_object and response_object["usage"] is not None:
|
if "usage" in response_object and response_object["usage"] is not None:
|
||||||
model_response_object.usage = Usage(
|
setattr(
|
||||||
|
model_response_object,
|
||||||
|
"usage",
|
||||||
|
Usage(
|
||||||
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
||||||
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
||||||
total_tokens=response_object["usage"].get("total_tokens", 0),
|
total_tokens=response_object["usage"].get("total_tokens", 0),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
|
@ -10042,6 +10050,7 @@ class CustomStreamWrapper:
|
||||||
"content" in completion_obj
|
"content" in completion_obj
|
||||||
and isinstance(completion_obj["content"], str)
|
and isinstance(completion_obj["content"], str)
|
||||||
and len(completion_obj["content"]) == 0
|
and len(completion_obj["content"]) == 0
|
||||||
|
and hasattr(model_response, "usage")
|
||||||
and hasattr(model_response.usage, "prompt_tokens")
|
and hasattr(model_response.usage, "prompt_tokens")
|
||||||
):
|
):
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue