diff --git a/litellm/integrations/openmeter.py b/litellm/integrations/openmeter.py index 2c470d6f49..6905fd789f 100644 --- a/litellm/integrations/openmeter.py +++ b/litellm/integrations/openmeter.py @@ -1,12 +1,18 @@ # What is this? ## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268 -import dotenv, os, json -import litellm +import json +import os import traceback +import uuid + +import dotenv +import httpx + +import litellm +from litellm import verbose_logger from litellm.integrations.custom_logger import CustomLogger from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -import uuid def get_utc_datetime(): @@ -122,7 +128,11 @@ class OpenMeterLogger(CustomLogger): ) response.raise_for_status() - except Exception as e: - if hasattr(response, "text"): - litellm.print_verbose(f"\nError Message: {response.text}") + except httpx.HTTPStatusError as e: + verbose_logger.error( + "Failed OpenMeter logging - {}".format(e.response.text) + ) + raise e + except Exception as e: + verbose_logger.error("Failed OpenMeter logging - {}".format(str(e))) raise e diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index cf983bbd5e..36d4f6aa1c 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -638,6 +638,8 @@ class Logging: verbose_logger.debug(f"success callbacks: {litellm.success_callback}") ## BUILD COMPLETE STREAMED RESPONSE complete_streaming_response = None + if "complete_streaming_response" in self.model_call_details: + return # break out of this. if self.stream and isinstance(result, ModelResponse): if ( result.choices[0].finish_reason is not None @@ -1279,6 +1281,8 @@ class Logging: ) ## BUILD COMPLETE STREAMED RESPONSE complete_streaming_response = None + if "async_complete_streaming_response" in self.model_call_details: + return # break out of this. if self.stream: if result.choices[0].finish_reason is not None: # if it's the last chunk self.streaming_chunks.append(result) @@ -1302,6 +1306,7 @@ class Logging: self.streaming_chunks.append(result) if complete_streaming_response is not None: print_verbose("Async success callbacks: Got a complete streaming response") + self.model_call_details["async_complete_streaming_response"] = ( complete_streaming_response ) @@ -1431,7 +1436,7 @@ class Logging: end_time=end_time, ) if isinstance(callback, CustomLogger): # custom logger class - if self.stream == True: + if self.stream is True: if ( "async_complete_streaming_response" in self.model_call_details diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 33a272fa13..2e71d4c827 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,24 +1,7 @@ -general_settings: - store_model_in_db: true - database_connection_pool_limit: 20 - model_list: - - model_name: fake-openai-endpoint + - model_name: "*" litellm_params: - model: openai/my-fake-model - api_key: my-fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ -litellm_settings: - drop_params: True - success_callback: ["prometheus"] - failure_callback: ["prometheus"] - service_callback: ["prometheus_system"] - _langfuse_default_tags: ["user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] + model: "*" -router_settings: - routing_strategy: "latency-based-routing" - routing_strategy_args: {"ttl": 86400} # Average the last 10 calls to compute avg latency per model - allowed_fails: 1 - num_retries: 3 - retry_after: 5 # seconds to wait before retrying a failed request - cooldown_time: 30 # seconds to cooldown a deployment after failure +litellm_settings: + callbacks: ["openmeter"] diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index 247a54b542..2995fdbe52 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -14,6 +14,7 @@ from pydantic import BaseModel sys.path.insert(0, os.path.abspath("../..")) from typing import List, Literal, Optional, Union +from unittest.mock import AsyncMock, MagicMock, patch import litellm from litellm import Cache, completion, embedding @@ -518,6 +519,29 @@ async def test_async_chat_azure_stream(): # asyncio.run(test_async_chat_azure_stream()) +@pytest.mark.asyncio +async def test_async_chat_openai_stream_options(): + try: + customHandler = CompletionCustomHandler() + litellm.callbacks = [customHandler] + with patch.object( + customHandler, "async_log_success_event", new=AsyncMock() + ) as mock_client: + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hi 👋 - i'm async openai"}], + stream=True, + stream_options={"include_usage": True}, + ) + + async for chunk in response: + continue + + mock_client.assert_awaited_once() + except Exception as e: + pytest.fail(f"An exception occurred: {str(e)}") + + ## Test Bedrock + sync def test_chat_bedrock_stream(): try: