diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py index fcf885c9e2..e209977513 100644 --- a/litellm/llms/ai21.py +++ b/litellm/llms/ai21.py @@ -180,7 +180,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model model_response["usage"] = { "prompt_tokens": prompt_tokens, diff --git a/litellm/llms/aleph_alpha.py b/litellm/llms/aleph_alpha.py index 88a4bd8b53..9bceec51bd 100644 --- a/litellm/llms/aleph_alpha.py +++ b/litellm/llms/aleph_alpha.py @@ -263,7 +263,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"]["content"]) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 5b0f14c440..738eb9024d 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -172,7 +172,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) ##[TODO] use the anthropic tokenizer here - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/baseten.py b/litellm/llms/baseten.py index 1281e4df9b..b1e904c7a1 100644 --- a/litellm/llms/baseten.py +++ b/litellm/llms/baseten.py @@ -134,7 +134,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"]["content"]) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index fc668ab9da..7e7ee2ffc7 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -464,7 +464,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py index 7bdf4fc5c0..8581731b5a 100644 --- a/litellm/llms/cohere.py +++ b/litellm/llms/cohere.py @@ -184,7 +184,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 792a42678b..a121645635 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -224,7 +224,7 @@ class Huggingface(BaseLLM): else: completion_tokens = 0 - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/maritalk.py b/litellm/llms/maritalk.py index 2df99dfd3a..68a3a4e32d 100644 --- a/litellm/llms/maritalk.py +++ b/litellm/llms/maritalk.py @@ -143,7 +143,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/nlp_cloud.py b/litellm/llms/nlp_cloud.py index 07b2c24998..a5f70c59b3 100644 --- a/litellm/llms/nlp_cloud.py +++ b/litellm/llms/nlp_cloud.py @@ -169,7 +169,7 @@ def completion( prompt_tokens = completion_response["nb_input_tokens"] completion_tokens = completion_response["nb_generated_tokens"] - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/oobabooga.py b/litellm/llms/oobabooga.py index 9b68a6aff9..db0403c965 100644 --- a/litellm/llms/oobabooga.py +++ b/litellm/llms/oobabooga.py @@ -109,7 +109,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"]["content"]) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py index f5f985ad40..010e6720c4 100644 --- a/litellm/llms/palm.py +++ b/litellm/llms/palm.py @@ -162,7 +162,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = "palm/" + model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index fcaa48f972..f9ce3ad0ce 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -174,7 +174,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index c020cfb45c..3bcf5f34d8 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -229,7 +229,7 @@ def completion( ## Step1: Start Prediction: gets a prediction url ## Step2: Poll prediction url for response ## Step2: is handled with and without streaming - model_response["created"] = time.time() # for pricing this must remain right before calling api + model_response["created"] = int(time.time()) # for pricing this must remain right before calling api prediction_url = start_prediction(version_id, input_data, api_key, api_base, logging_obj=logging_obj, print_verbose=print_verbose) print_verbose(prediction_url) diff --git a/litellm/llms/sagemaker.py b/litellm/llms/sagemaker.py index b25c83eb3e..04a08a4512 100644 --- a/litellm/llms/sagemaker.py +++ b/litellm/llms/sagemaker.py @@ -170,7 +170,7 @@ def completion( encoding.encode(model_response["choices"][0]["message"].get("content", "")) ) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/together_ai.py b/litellm/llms/together_ai.py index ea993d1320..8e4970a7b6 100644 --- a/litellm/llms/together_ai.py +++ b/litellm/llms/together_ai.py @@ -183,7 +183,7 @@ def completion( ) if "finish_reason" in completion_response["output"]["choices"][0]: model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"] - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index bbb23a0c2f..4ca2994c98 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -144,7 +144,7 @@ def completion( "content" ] = str(completion_response) model_response["choices"][0]["message"]["content"] = str(completion_response) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model ## CALCULATING USAGE prompt_tokens = len( diff --git a/litellm/llms/vllm.py b/litellm/llms/vllm.py index ce391d4b5f..9f4d480f65 100644 --- a/litellm/llms/vllm.py +++ b/litellm/llms/vllm.py @@ -90,7 +90,7 @@ def completion( prompt_tokens = len(outputs[0].prompt_token_ids) completion_tokens = len(outputs[0].outputs[0].token_ids) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, @@ -173,7 +173,7 @@ def batch_completions( prompt_tokens = len(output.prompt_token_ids) completion_tokens = len(output.outputs[0].token_ids) - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model usage = Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/main.py b/litellm/main.py index 92a35e583b..b614e6ae67 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -228,7 +228,7 @@ def mock_completion(model: str, messages: List, stream: Optional[bool] = False, return response model_response["choices"][0]["message"]["content"] = mock_response - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model return model_response @@ -388,7 +388,7 @@ def completion( api_base = "https://proxy.litellm.ai" custom_llm_provider = "openai" api_key = model_api_key - + # check if user passed in any of the OpenAI optional params optional_params = get_optional_params( functions=functions, @@ -1245,7 +1245,7 @@ def completion( ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = response_string - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + model prompt_tokens = len(encoding.encode(prompt)) # type: ignore completion_tokens = len(encoding.encode(response_string)) @@ -1371,7 +1371,7 @@ def completion( string_response = response_json['data'][0]['output'][0] ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = string_response - model_response["created"] = time.time() + model_response["created"] = int(time.time()) model_response["model"] = model response = model_response else: