Litellm Minor Fixes & Improvements (10/03/2024) (#6049)

* fix(proxy_server.py): remove spendlog fixes from proxy startup logic Moves https://github.com/BerriAI/litellm/pull/4794 to `/db_scripts` and cleans up some caching-related debug info (easier to trace debug logs) * fix(langfuse_endpoints.py): Fixes https://github.com/BerriAI/litellm/issues/6041 * fix(azure.py): fix health checks for azure audio transcription models Fixes https://github.com/BerriAI/litellm/issues/5999 * Feat: Add Literal AI Integration (#5653) * feat: add Literal AI integration * update readme * Update README.md * fix: address comments * fix: remove literalai sdk * fix: use HTTPHandler * chore: add test * fix: add asyncio lock * fix(literal_ai.py): fix linting errors * fix(literal_ai.py): fix linting errors * refactor: cleanup --------- Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
2024-10-03 18:02:28 -04:00 · 2024-10-03 18:02:28 -04:00 · 5c33d1c9af
commit 5c33d1c9af
parent f9d0bcc5a1
14 changed files with 557 additions and 44 deletions
--- a/db_scripts/create_views.py
+++ b/db_scripts/create_views.py
@ -4,6 +4,7 @@ python script to pre-create all views required by LiteLLM Proxy Server

 import asyncio
 import os
+from update_unassigned_teams import apply_db_fixes

 # Enter your DATABASE_URL here

@ -204,6 +205,7 @@ async def check_view_exists():

        print("Last30dTopEndUsersSpend Created!")  # noqa

+    await apply_db_fixes(db=db)
    return


--- a/db_scripts/update_unassigned_teams.py
+++ b/db_scripts/update_unassigned_teams.py
@ -0,0 +1,27 @@
+from prisma import Prisma
+
+
+async def apply_db_fixes(db: Prisma):
+    try:
+        sql_query = """
+            UPDATE "LiteLLM_SpendLogs"
+            SET team_id = (
+                SELECT vt.team_id
+                FROM "LiteLLM_VerificationToken" vt
+                WHERE vt.token = "LiteLLM_SpendLogs".api_key
+            )
+            WHERE team_id IS NULL
+            AND EXISTS (
+                SELECT 1
+                FROM "LiteLLM_VerificationToken" vt
+                WHERE vt.token = "LiteLLM_SpendLogs".api_key
+            );
+        """
+        response = await db.query_raw(sql_query)
+        print(
+            "Updated unassigned teams, Response=%s",
+            response,
+        )
+    except Exception as e:
+        raise Exception(f"Error apply_db_fixes: {str(e)}")
+    return
--- a/docs/my-website/docs/observability/literalai_integration.md
+++ b/docs/my-website/docs/observability/literalai_integration.md
@ -0,0 +1,119 @@
+import Image from '@theme/IdealImage';
+
+# Literal AI - Log, Evaluate, Monitor
+
+[Literal AI](https://literalai.com) is a collaborative observability, evaluation and analytics platform for building production-grade LLM apps.
+
+<Image img={require('../../img/literalai.png')} />
+
+## Pre-Requisites
+
+Ensure you have the `literalai` package installed:
+
+```shell
+pip install literalai litellm
+```
+
+## Quick Start
+
+```python
+import litellm
+import os
+
+os.environ["LITERAL_API_KEY"] = ""
+os.environ['OPENAI_API_KEY']= ""
+
+litellm.success_callback = ["literalai"] # Log Input/Output to LiteralAI
+litellm.failure_callback = ["literalai"] # Log Errors to LiteralAI
+
+# openai call
+response = litellm.completion(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "user", "content": "Hi 👋 - i'm openai"}
+  ]
+)
+```
+
+## Multi Step Traces
+
+This integration is compatible with the Literal AI SDK decorators, enabling conversation and agent tracing
+
+```py
+import litellm
+from literalai import LiteralClient
+import os
+
+os.environ["LITERAL_API_KEY"] = ""
+os.environ['OPENAI_API_KEY']= ""
+
+litellm.input_callback = ["literalai"] # Support other Literal AI decorators and prompt templates
+litellm.success_callback = ["literalai"] # Log Input/Output to LiteralAI
+litellm.failure_callback = ["literalai"] # Log Errors to LiteralAI
+
+literalai_client = LiteralClient()
+
+@literalai_client.run
+def my_agent(question: str):
+    # agent logic here
+    response = litellm.completion(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": question}
+        ]
+    )
+    return response
+
+my_agent("Hello world")
+
+# Waiting to send all logs before exiting, not needed in a production server
+literalai_client.flush()
+```
+
+Learn more about [Literal AI logging capabilities](https://docs.literalai.com/guides/logs).
+
+## Bind a Generation to its Prompt Template
+
+This integration works out of the box with prompts managed on Literal AI. This means that a specific LLM generation will be bound to its template.
+
+Learn more about [Prompt Management](https://docs.literalai.com/guides/prompt-management#pull-a-prompt-template-from-literal-ai) on Literal AI.
+
+## OpenAI Proxy Usage
+
+If you are using the Lite LLM proxy, you can use the Literal AI OpenAI instrumentation to log your calls.
+
+```py
+from literalai import LiteralClient
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="anything",            # litellm proxy virtual key
+    base_url="http://0.0.0.0:4000" # litellm proxy base_url
+)
+
+literalai_client = LiteralClient(api_key="")
+
+# Instrument the OpenAI client
+literalai_client.instrument_openai()
+
+settings = {
+    "model": "gpt-3.5-turbo", # model you want to send litellm proxy
+    "temperature": 0,
+    # ... more settings
+}
+
+response = client.chat.completions.create(
+        messages=[
+            {
+                "content": "You are a helpful bot, you always reply in Spanish",
+                "role": "system"
+            },
+            {
+                "content": message.content,
+                "role": "user"
+            }
+        ],
+        **settings
+    )
+
+```
--- a/docs/my-website/img/literalai.png
+++ b/docs/my-website/img/literalai.png
--- a/litellm/init.py
+++ b/litellm/init.py
@ -43,6 +43,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
    "lago",
    "openmeter",
    "logfire",
+    "literalai",
    "dynamic_rate_limiter",
    "langsmith",
    "prometheus",
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -105,9 +105,6 @@ class InMemoryCache(BaseCache):
                # This can occur when an object is referenced by another object, but the reference is never removed.

    def set_cache(self, key, value, **kwargs):
-        print_verbose(
-            "InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict))
-        )
        if len(self.cache_dict) >= self.max_size_in_memory:
            # only evict when cache is full
            self.evict_cache()
@ -1835,7 +1832,6 @@ class DualCache(BaseCache):
    def set_cache(self, key, value, local_only: bool = False, **kwargs):
        # Update both Redis and in-memory cache
        try:
-            print_verbose(f"set cache: key: {key}; value: {value}")
            if self.in_memory_cache is not None:
                if "ttl" not in kwargs and self.default_in_memory_ttl is not None:
                    kwargs["ttl"] = self.default_in_memory_ttl
@ -1873,7 +1869,6 @@ class DualCache(BaseCache):
    def get_cache(self, key, local_only: bool = False, **kwargs):
        # Try to fetch from in-memory cache first
        try:
-            print_verbose(f"get cache: cache key: {key}; local_only: {local_only}")
            result = None
            if self.in_memory_cache is not None:
                in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
@ -1906,7 +1901,6 @@ class DualCache(BaseCache):
            if self.in_memory_cache is not None:
                in_memory_result = self.in_memory_cache.batch_get_cache(keys, **kwargs)

-                print_verbose(f"in_memory_result: {in_memory_result}")
                if in_memory_result is not None:
                    result = in_memory_result

--- a/litellm/integrations/literal_ai.py
+++ b/litellm/integrations/literal_ai.py
@ -0,0 +1,318 @@
+#### What this does ####
+# This file contains the LiteralAILogger class which is used to log steps to the LiteralAI observability platform.
+import asyncio
+import os
+import traceback
+import uuid
+from typing import Optional
+
+import httpx
+
+from litellm._logging import verbose_logger
+from litellm.integrations.custom_batch_logger import CustomBatchLogger
+from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
+from litellm.llms.custom_httpx.http_handler import (
+    HTTPHandler,
+    get_async_httpx_client,
+    httpxSpecialProvider,
+)
+from litellm.types.utils import StandardLoggingPayload
+
+
+class LiteralAILogger(CustomBatchLogger):
+    def __init__(
+        self,
+        literalai_api_key=None,
+        literalai_api_url="https://cloud.getliteral.ai",
+        env=None,
+        **kwargs,
+    ):
+        self.literalai_api_url = os.getenv("LITERAL_API_URL") or literalai_api_url
+        self.headers = {
+            "Content-Type": "application/json",
+            "x-api-key": literalai_api_key or os.getenv("LITERAL_API_KEY"),
+            "x-client-name": "litellm",
+        }
+        if env:
+            self.headers["x-env"] = env
+        self.async_httpx_client = get_async_httpx_client(
+            llm_provider=httpxSpecialProvider.LoggingCallback
+        )
+        self.sync_http_handler = HTTPHandler()
+        batch_size = os.getenv("LITERAL_BATCH_SIZE", None)
+        self.flush_lock = asyncio.Lock()
+        super().__init__(
+            **kwargs,
+            flush_lock=self.flush_lock,
+            batch_size=int(batch_size) if batch_size else None,
+        )
+
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        try:
+            verbose_logger.debug(
+                "Literal AI Layer Logging - kwargs: %s, response_obj: %s",
+                kwargs,
+                response_obj,
+            )
+            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
+            self.log_queue.append(data)
+            verbose_logger.debug(
+                "Literal AI logging: queue length %s, batch size %s",
+                len(self.log_queue),
+                self.batch_size,
+            )
+            if len(self.log_queue) >= self.batch_size:
+                self._send_batch()
+        except Exception:
+            verbose_logger.exception(
+                "Literal AI Layer Error - error logging success event."
+            )
+
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("Literal AI Failure Event Logging!")
+        try:
+            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
+            self.log_queue.append(data)
+            verbose_logger.debug(
+                "Literal AI logging: queue length %s, batch size %s",
+                len(self.log_queue),
+                self.batch_size,
+            )
+            if len(self.log_queue) >= self.batch_size:
+                self._send_batch()
+        except Exception:
+            verbose_logger.exception(
+                "Literal AI Layer Error - error logging failure event."
+            )
+
+    def _send_batch(self):
+        if not self.log_queue:
+            return
+
+        url = f"{self.literalai_api_url}/api/graphql"
+        query = self._steps_query_builder(self.log_queue)
+        variables = self._steps_variables_builder(self.log_queue)
+
+        try:
+            response = self.sync_http_handler.post(
+                url=url,
+                json={
+                    "query": query,
+                    "variables": variables,
+                },
+                headers=self.headers,
+            )
+            response.raise_for_status()
+
+            if response.status_code >= 300:
+                verbose_logger.error(
+                    f"Literal AI Error: {response.status_code} - {response.text}"
+                )
+            else:
+                verbose_logger.debug(
+                    f"Batch of {len(self.log_queue)} runs successfully created"
+                )
+        except Exception:
+            verbose_logger.exception("Literal AI Layer Error")
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        try:
+            verbose_logger.debug(
+                "Literal AI Async Layer Logging - kwargs: %s, response_obj: %s",
+                kwargs,
+                response_obj,
+            )
+            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
+            self.log_queue.append(data)
+            verbose_logger.debug(
+                "Literal AI logging: queue length %s, batch size %s",
+                len(self.log_queue),
+                self.batch_size,
+            )
+            if len(self.log_queue) >= self.batch_size:
+                await self.flush_queue()
+        except Exception:
+            verbose_logger.exception(
+                "Literal AI Layer Error - error logging async success event."
+            )
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("Literal AI Failure Event Logging!")
+        try:
+            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
+            self.log_queue.append(data)
+            verbose_logger.debug(
+                "Literal AI logging: queue length %s, batch size %s",
+                len(self.log_queue),
+                self.batch_size,
+            )
+            if len(self.log_queue) >= self.batch_size:
+                await self.flush_queue()
+        except Exception:
+            verbose_logger.exception(
+                "Literal AI Layer Error - error logging async failure event."
+            )
+
+    async def async_send_batch(self):
+        if not self.log_queue:
+            return
+
+        url = f"{self.literalai_api_url}/api/graphql"
+        query = self._steps_query_builder(self.log_queue)
+        variables = self._steps_variables_builder(self.log_queue)
+
+        try:
+            response = await self.async_httpx_client.post(
+                url=url,
+                json={
+                    "query": query,
+                    "variables": variables,
+                },
+                headers=self.headers,
+            )
+            response.raise_for_status()
+
+            if response.status_code >= 300:
+                verbose_logger.error(
+                    f"Literal AI Error: {response.status_code} - {response.text}"
+                )
+            else:
+                verbose_logger.debug(
+                    f"Batch of {len(self.log_queue)} runs successfully created"
+                )
+        except httpx.HTTPStatusError as e:
+            verbose_logger.exception(
+                f"Literal AI HTTP Error: {e.response.status_code} - {e.response.text}"
+            )
+        except Exception:
+            verbose_logger.exception("Literal AI Layer Error")
+
+    def _prepare_log_data(self, kwargs, response_obj, start_time, end_time) -> dict:
+        logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
+            "standard_logging_object", None
+        )
+
+        if logging_payload is None:
+            raise ValueError("standard_logging_object not found in kwargs")
+        clean_metadata = logging_payload["metadata"]
+        metadata = kwargs.get("litellm_params", {}).get("metadata", {})
+
+        settings = logging_payload["model_parameters"]
+
+        messages = logging_payload["messages"]
+        prompt_id = None
+        variables = None
+
+        if messages and isinstance(messages, list) and isinstance(messages[0], dict):
+            for message in messages:
+                if literal_prompt := getattr(message, "__literal_prompt__", None):
+                    prompt_id = literal_prompt.get("prompt_id")
+                    variables = literal_prompt.get("variables")
+                    message["uuid"] = literal_prompt.get("uuid")
+                    message["templated"] = True
+
+        tools = settings.pop("tools", None)
+
+        step = {
+            "id": metadata.get("step_id", str(uuid.uuid4())),
+            "error": logging_payload["error_str"],
+            "name": kwargs.get("model", ""),
+            "threadId": metadata.get("literalai_thread_id", None),
+            "parentId": metadata.get("literalai_parent_id", None),
+            "rootRunId": metadata.get("literalai_root_run_id", None),
+            "input": None,
+            "output": None,
+            "type": "llm",
+            "tags": metadata.get("tags", metadata.get("literalai_tags", None)),
+            "startTime": str(start_time),
+            "endTime": str(end_time),
+            "metadata": clean_metadata,
+            "generation": {
+                "inputTokenCount": logging_payload["prompt_tokens"],
+                "outputTokenCount": logging_payload["completion_tokens"],
+                "tokenCount": logging_payload["total_tokens"],
+                "promptId": prompt_id,
+                "variables": variables,
+                "provider": kwargs.get("custom_llm_provider", "litellm"),
+                "model": kwargs.get("model", ""),
+                "duration": (end_time - start_time).total_seconds(),
+                "settings": settings,
+                "messages": messages,
+                "tools": tools,
+            },
+        }
+        return step
+
+    def _steps_query_variables_builder(self, steps):
+        generated = ""
+        for id in range(len(steps)):
+            generated += f"""$id_{id}: String!
+            $threadId_{id}: String
+            $rootRunId_{id}: String
+            $type_{id}: StepType
+            $startTime_{id}: DateTime
+            $endTime_{id}: DateTime
+            $error_{id}: String
+            $input_{id}: Json
+            $output_{id}: Json
+            $metadata_{id}: Json
+            $parentId_{id}: String
+            $name_{id}: String
+            $tags_{id}: [String!]
+            $generation_{id}: GenerationPayloadInput
+            $scores_{id}: [ScorePayloadInput!]
+            $attachments_{id}: [AttachmentPayloadInput!]
+            """
+        return generated
+
+    def _steps_ingest_steps_builder(self, steps):
+        generated = ""
+        for id in range(len(steps)):
+            generated += f"""
+        step{id}: ingestStep(
+            id: $id_{id}
+            threadId: $threadId_{id}
+            rootRunId: $rootRunId_{id}
+            startTime: $startTime_{id}
+            endTime: $endTime_{id}
+            type: $type_{id}
+            error: $error_{id}
+            input: $input_{id}
+            output: $output_{id}
+            metadata: $metadata_{id}
+            parentId: $parentId_{id}
+            name: $name_{id}
+            tags: $tags_{id}
+            generation: $generation_{id}
+            scores: $scores_{id}
+            attachments: $attachments_{id}
+        ) {{
+            ok
+            message
+        }}
+    """
+        return generated
+
+    def _steps_query_builder(self, steps):
+        return f"""
+        mutation AddStep({self._steps_query_variables_builder(steps)}) {{
+        {self._steps_ingest_steps_builder(steps)}
+        }}
+        """
+
+    def _steps_variables_builder(self, steps):
+        def serialize_step(event, id):
+            result = {}
+
+            for key, value in event.items():
+                # Only keep the keys that are not None to avoid overriding existing values
+                if value is not None:
+                    result[f"{key}_{id}"] = value
+
+            return result
+
+        variables = {}
+        for i in range(len(steps)):
+            step = steps[i]
+            variables.update(serialize_step(step, i))
+        return variables
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -72,6 +72,7 @@ from ..integrations.lago import LagoLogger
 from ..integrations.langfuse import LangFuseLogger
 from ..integrations.langsmith import LangsmithLogger
 from ..integrations.litedebugger import LiteDebugger
+from ..integrations.literal_ai import LiteralAILogger
 from ..integrations.logfire_logger import LogfireLevel, LogfireLogger
 from ..integrations.lunary import LunaryLogger
 from ..integrations.openmeter import OpenMeterLogger
@ -2245,6 +2246,14 @@ def _init_custom_logger_compatible_class(
        _langsmith_logger = LangsmithLogger()
        _in_memory_loggers.append(_langsmith_logger)
        return _langsmith_logger  # type: ignore
+    elif logging_integration == "literalai":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, LiteralAILogger):
+                return callback  # type: ignore
+
+        _literalai_logger = LiteralAILogger()
+        _in_memory_loggers.append(_literalai_logger)
+        return _literalai_logger  # type: ignore
    elif logging_integration == "prometheus":
        for callback in _in_memory_loggers:
            if isinstance(callback, PrometheusLogger):
@ -2394,6 +2403,10 @@ def get_custom_logger_compatible_class(
        for callback in _in_memory_loggers:
            if isinstance(callback, LangsmithLogger):
                return callback
+    elif logging_integration == "literalai":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, LiteralAILogger):
+                return callback
    elif logging_integration == "prometheus":
        for callback in _in_memory_loggers:
            if isinstance(callback, PrometheusLogger):
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@ -1813,7 +1813,9 @@ class AzureChatCompletion(BaseLLM):
        elif mode == "audio_transcription":
            # Get the current directory of the file being run
            pwd = os.path.dirname(os.path.realpath(__file__))
-            file_path = os.path.join(pwd, "../tests/gettysburg.wav")
+            file_path = os.path.join(
+                pwd, "../../../tests/gettysburg.wav"
+            )  # proxy address
            audio_file = open(file_path, "rb")
            completion = await client.audio.transcriptions.with_raw_response.create(
                file=audio_file,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -9,5 +9,4 @@ model_list:
    litellm_params:
      model: openai/gpt-4o-realtime-preview-2024-10-01
      api_key: os.environ/OPENAI_API_KEY
-      api_base: http://localhost:8080
-
+      api_base: http://localhost:8080
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2902,13 +2902,6 @@ async def startup_event():
            )
        )

-    ### CHECK IF VIEW EXISTS ###
-    if prisma_client is not None:
-        await prisma_client.check_view_exists()
-        # Apply misc fixes on DB
-        # [non-blocking] helper to apply fixes from older litellm versions
-        asyncio.create_task(prisma_client.apply_db_fixes())
-
    ### START BATCH WRITING DB + CHECKING NEW MODELS###
    if prisma_client is not None:
        scheduler = AsyncIOScheduler()
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -2360,34 +2360,6 @@ class PrismaClient:
            )
            raise e

-    async def apply_db_fixes(self):
-        try:
-            verbose_proxy_logger.debug(
-                "Applying LiteLLM - DB Fixes fixing logs in SpendLogs"
-            )
-            sql_query = """
-                UPDATE "LiteLLM_SpendLogs"
-                SET team_id = (
-                    SELECT vt.team_id
-                    FROM "LiteLLM_VerificationToken" vt
-                    WHERE vt.token = "LiteLLM_SpendLogs".api_key
-                )
-                WHERE team_id IS NULL
-                AND EXISTS (
-                    SELECT 1
-                    FROM "LiteLLM_VerificationToken" vt
-                    WHERE vt.token = "LiteLLM_SpendLogs".api_key
-                );
-            """
-            response = await self.db.query_raw(sql_query)
-            verbose_proxy_logger.debug(
-                "Applied LiteLLM - DB Fixes fixing logs in SpendLogs, Response=%s",
-                response,
-            )
-        except Exception as e:
-            verbose_proxy_logger.debug(f"Error apply_db_fixes: {str(e)}")
-        return
-

 ### CUSTOM FILE ###
 def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
--- a/litellm/proxy/vertex_ai_endpoints/langfuse_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/langfuse_endpoints.py
@ -140,6 +140,7 @@ async def langfuse_proxy_route(
        request,
        fastapi_response,
        user_api_key_dict,
+        query_params=dict(request.query_params),  # type: ignore
    )

    return received_value
--- a/tests/local_testing/test_literalai.py
+++ b/tests/local_testing/test_literalai.py
@ -0,0 +1,72 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+import asyncio
+import logging
+
+import pytest
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.integrations.literal_ai import LiteralAILogger
+
+verbose_logger.setLevel(logging.DEBUG)
+
+litellm.set_verbose = True
+
+
+@pytest.mark.asyncio
+async def test_literalai_queue_logging():
+    try:
+        # Initialize LiteralAILogger
+        test_literalai_logger = LiteralAILogger()
+
+        litellm.callbacks = [test_literalai_logger]
+        test_literalai_logger.batch_size = 6
+        litellm.set_verbose = True
+
+        # Make multiple calls to ensure we don't hit the batch size
+        for _ in range(5):
+            response = await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Test message"}],
+                max_tokens=10,
+                temperature=0.2,
+                mock_response="This is a mock response",
+            )
+
+        await asyncio.sleep(3)
+
+        # Check that logs are in the queue
+        assert len(test_literalai_logger.log_queue) == 5
+
+        # Now make calls to exceed the batch size
+        for _ in range(3):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Test message"}],
+                max_tokens=10,
+                temperature=0.2,
+                mock_response="This is a mock response",
+            )
+
+        # Wait a short time for any asynchronous operations to complete
+        await asyncio.sleep(1)
+
+        print(
+            "Length of literalai log queue: {}".format(
+                len(test_literalai_logger.log_queue)
+            )
+        )
+        # Check that the queue was flushed after exceeding batch size
+        assert len(test_literalai_logger.log_queue) < 5
+
+        # Clean up
+        for cb in litellm.callbacks:
+            if isinstance(cb, LiteralAILogger):
+                await cb.async_httpx_client.client.aclose()
+
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")