diff --git a/.gitignore b/.gitignore
index b75a92309a..8d99ae8af8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,4 +55,7 @@ litellm/proxy/_super_secret_config.yaml
 litellm/proxy/_super_secret_config.yaml
 litellm/proxy/myenv/bin/activate
 litellm/proxy/myenv/bin/Activate.ps1
-myenv/*
\ No newline at end of file
+myenv/*
+litellm/proxy/_experimental/out/404/index.html
+litellm/proxy/_experimental/out/model_hub/index.html
+litellm/proxy/_experimental/out/onboarding/index.html
diff --git a/litellm/__init__.py b/litellm/__init__.py
index a9f2fe537a..f67a252ebc 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -815,3 +815,4 @@ from .router import Router
 from .assistants.main import *
 from .batches.main import *
 from .scheduler import *
+from .cost_calculator import response_cost_calculator
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
new file mode 100644
index 0000000000..75717378b0
--- /dev/null
+++ b/litellm/cost_calculator.py
@@ -0,0 +1,80 @@
+# What is this?
+## File for 'response_cost' calculation in Logging
+from typing import Optional, Union, Literal
+from litellm.utils import (
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    TranscriptionResponse,
+    TextCompletionResponse,
+    CallTypes,
+    completion_cost,
+    print_verbose,
+)
+import litellm
+
+
+def response_cost_calculator(
+    response_object: Union[
+        ModelResponse,
+        EmbeddingResponse,
+        ImageResponse,
+        TranscriptionResponse,
+        TextCompletionResponse,
+    ],
+    model: str,
+    custom_llm_provider: str,
+    call_type: Literal[
+        "embedding",
+        "aembedding",
+        "completion",
+        "acompletion",
+        "atext_completion",
+        "text_completion",
+        "image_generation",
+        "aimage_generation",
+        "moderation",
+        "amoderation",
+        "atranscription",
+        "transcription",
+        "aspeech",
+        "speech",
+    ],
+    optional_params: dict,
+    cache_hit: Optional[bool] = None,
+    base_model: Optional[str] = None,
+    custom_pricing: Optional[bool] = None,
+) -> Optional[float]:
+    try:
+        response_cost: float = 0.0
+        if cache_hit is not None and cache_hit == True:
+            response_cost = 0.0
+        else:
+            response_object._hidden_params["optional_params"] = optional_params
+            if isinstance(response_object, ImageResponse):
+                response_cost = completion_cost(
+                    completion_response=response_object,
+                    model=model,
+                    call_type=call_type,
+                    custom_llm_provider=custom_llm_provider,
+                )
+            else:
+                if (
+                    model in litellm.model_cost
+                    and custom_pricing is not None
+                    and custom_llm_provider == True
+                ):  # override defaults if custom pricing is set
+                    base_model = model
+                # base_model defaults to None if not set on model_info
+                response_cost = completion_cost(
+                    completion_response=response_object,
+                    call_type=call_type,
+                    model=base_model,
+                    custom_llm_provider=custom_llm_provider,
+                )
+        return response_cost
+    except litellm.NotFoundError as e:
+        print_verbose(
+            f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map."
+        )
+        return None
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index b186d4e115..b91aaee2ae 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -84,9 +84,9 @@ class AsyncHTTPHandler:
             response.raise_for_status()
             return response
         except httpx.HTTPStatusError as e:
-            raise
+            raise e
         except Exception as e:
-            raise
+            raise e
 
     def __del__(self) -> None:
         try:
diff --git a/litellm/main.py b/litellm/main.py
index f1d47427f4..998494ddc0 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3724,7 +3724,7 @@ async def amoderation(input: str, model: str, api_key: Optional[str] = None, **k
 
 ##### Image Generation #######################
 @client
-async def aimage_generation(*args, **kwargs):
+async def aimage_generation(*args, **kwargs) -> ImageResponse:
     """
     Asynchronously calls the `image_generation` function with the given arguments and keyword arguments.
 
@@ -3757,6 +3757,8 @@ async def aimage_generation(*args, **kwargs):
         if isinstance(init_response, dict) or isinstance(
             init_response, ImageResponse
         ):  ## CACHING SCENARIO
+            if isinstance(init_response, dict):
+                init_response = ImageResponse(**init_response)
             response = init_response
         elif asyncio.iscoroutine(init_response):
             response = await init_response
@@ -3792,7 +3794,7 @@ def image_generation(
     litellm_logging_obj=None,
     custom_llm_provider=None,
     **kwargs,
-):
+) -> ImageResponse:
     """
     Maps the https://api.openai.com/v1/images/generations endpoint.
 
@@ -4533,7 +4535,7 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]
 
 def stream_chunk_builder(
     chunks: list, messages: Optional[list] = None, start_time=None, end_time=None
-):
+) -> Union[ModelResponse, TextCompletionResponse]:
     model_response = litellm.ModelResponse()
     ### SORT CHUNKS BASED ON CREATED ORDER ##
     print_verbose("Goes into checking if chunk has hiddden created at param")
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index b6a6a06adf..9abd05eaf0 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -5,6 +5,9 @@ model_list:
     model: openai/my-fake-model
     rpm: 800
   model_name: gpt-3.5-turbo-fake-model
+- model_name: llama3-70b-8192
+  litellm_params:
+    model: groq/llama3-70b-8192
 # - litellm_params:
 #     api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
 #     api_key: os.environ/AZURE_EUROPE_API_KEY
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index 1e1d271011..2b56934850 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -470,3 +470,51 @@ def test_replicate_llama3_cost_tracking():
         5,
     )
     assert cost == expected_cost
+
+
+@pytest.mark.parametrize("is_streaming", [True])  #  False
+def test_groq_response_cost_tracking(is_streaming):
+    from litellm.utils import (
+        ModelResponse,
+        Choices,
+        Message,
+        Usage,
+        CallTypes,
+        StreamingChoices,
+        Delta,
+    )
+
+    response = ModelResponse(
+        id="chatcmpl-876cce24-e520-4cf8-8649-562a9be11c02",
+        choices=[
+            Choices(
+                finish_reason="stop",
+                index=0,
+                message=Message(
+                    content="Hi! I'm an AI, so I don't have emotions or feelings like humans do, but I'm functioning properly and ready to help with any questions or topics you'd like to discuss! How can I assist you today?",
+                    role="assistant",
+                ),
+            )
+        ],
+        created=1717519830,
+        model="llama3-70b-8192",
+        object="chat.completion",
+        system_fingerprint="fp_c1a4bcec29",
+        usage=Usage(completion_tokens=46, prompt_tokens=17, total_tokens=63),
+    )
+    response._hidden_params["custom_llm_provider"] = "groq"
+    print(response)
+
+    response_cost = litellm.response_cost_calculator(
+        response_object=response,
+        model="groq/llama3-70b-8192",
+        custom_llm_provider="groq",
+        call_type=CallTypes.acompletion.value,
+        optional_params={},
+    )
+
+    assert isinstance(response_cost, float)
+    assert response_cost > 0.0
+
+    print(f"response_cost: {response_cost}")
+    assert False
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 92b798d84c..d7d646d1ed 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -885,6 +885,7 @@ def test_completion_mistral_api_mistral_large_function_call_with_streaming():
         idx = 0
         for chunk in response:
             print(f"chunk in response: {chunk}")
+            assert chunk._hidden_params["custom_llm_provider"] == "mistral"
             if idx == 0:
                 assert (
                     chunk.choices[0].delta.tool_calls[0].function.arguments is not None
@@ -898,7 +899,6 @@ def test_completion_mistral_api_mistral_large_function_call_with_streaming():
             elif chunk.choices[0].finish_reason is not None:  # last chunk
                 validate_final_streaming_function_calling_chunk(chunk=chunk)
             idx += 1
-        # raise Exception("it worked!")
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 4dcee6be45..91f2b48a14 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1499,51 +1499,21 @@ class Logging:
                 )
                 and self.stream != True
             ):  # handle streaming separately
-                try:
-                    if self.model_call_details.get("cache_hit", False) == True:
-                        self.model_call_details["response_cost"] = 0.0
-                    else:
-                        result._hidden_params["optional_params"] = self.optional_params
-                        if (
-                            self.call_type == CallTypes.aimage_generation.value
-                            or self.call_type == CallTypes.image_generation.value
-                        ):
-                            self.model_call_details["response_cost"] = (
-                                litellm.completion_cost(
-                                    completion_response=result,
-                                    model=self.model,
-                                    call_type=self.call_type,
-                                    custom_llm_provider=self.model_call_details.get(
-                                        "custom_llm_provider", None
-                                    ),  # set for img gen models
-                                )
-                            )
-                        else:
-                            base_model: Optional[str] = None
-                            # check if base_model set on azure
-                            base_model = _get_base_model_from_metadata(
-                                model_call_details=self.model_call_details
-                            )
-                            # litellm model name
-                            litellm_model = self.model_call_details["model"]
-                            if (
-                                litellm_model in litellm.model_cost
-                                and self.custom_pricing == True
-                            ):
-                                base_model = litellm_model
-                            # base_model defaults to None if not set on model_info
-                            self.model_call_details["response_cost"] = (
-                                litellm.completion_cost(
-                                    completion_response=result,
-                                    call_type=self.call_type,
-                                    model=base_model,
-                                )
-                            )
-                except litellm.NotFoundError as e:
-                    verbose_logger.debug(
-                        f"Model={self.model} not found in completion cost map."
+                self.model_call_details["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=result,
+                        model=self.model,
+                        cache_hit=self.model_call_details.get("cache_hit", False),
+                        custom_llm_provider=self.model_call_details.get(
+                            "custom_llm_provider", None
+                        ),
+                        base_model=_get_base_model_from_metadata(
+                            model_call_details=self.model_call_details
+                        ),
+                        call_type=self.call_type,
+                        optional_params=self.optional_params,
                     )
-                    self.model_call_details["response_cost"] = None
+                )
             else:  # streaming chunks + image gen.
                 self.model_call_details["response_cost"] = None
 
@@ -1607,29 +1577,21 @@ class Logging:
                 self.model_call_details["complete_streaming_response"] = (
                     complete_streaming_response
                 )
-                try:
-                    if self.model_call_details.get("cache_hit", False) == True:
-                        self.model_call_details["response_cost"] = 0.0
-                    else:
-                        # check if base_model set on azure
-                        base_model = _get_base_model_from_metadata(
+                self.model_call_details["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=complete_streaming_response,
+                        model=self.model,
+                        cache_hit=self.model_call_details.get("cache_hit", False),
+                        custom_llm_provider=self.model_call_details.get(
+                            "custom_llm_provider", None
+                        ),
+                        base_model=_get_base_model_from_metadata(
                             model_call_details=self.model_call_details
-                        )
-                        # base_model defaults to None if not set on model_info
-                        self.model_call_details["response_cost"] = (
-                            litellm.completion_cost(
-                                completion_response=complete_streaming_response,
-                                model=base_model,
-                            )
-                        )
-                    verbose_logger.debug(
-                        f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+                        ),
+                        call_type=self.call_type,
+                        optional_params=self.optional_params,
                     )
-                except litellm.NotFoundError as e:
-                    verbose_logger.debug(
-                        f"Model={self.model} not found in completion cost map."
-                    )
-                    self.model_call_details["response_cost"] = None
+                )
             if self.dynamic_success_callbacks is not None and isinstance(
                 self.dynamic_success_callbacks, list
             ):
@@ -4576,16 +4538,20 @@ def completion_cost(
     completion="",
     total_time=0.0,  # used for replicate, sagemaker
     call_type: Literal[
-        "completion",
-        "acompletion",
         "embedding",
         "aembedding",
+        "completion",
+        "acompletion",
         "atext_completion",
         "text_completion",
         "image_generation",
         "aimage_generation",
-        "transcription",
+        "moderation",
+        "amoderation",
         "atranscription",
+        "transcription",
+        "aspeech",
+        "speech",
     ] = "completion",
     ### REGION ###
     custom_llm_provider=None,
@@ -11096,8 +11062,16 @@ class CustomStreamWrapper:
             return ""
 
     def model_response_creator(self):
+        _model = self.model
+        _received_llm_provider = self.custom_llm_provider
+        _logging_obj_llm_provider = self.logging_obj.model_call_details.get("custom_llm_provider", None)  # type: ignore
+        if (
+            _received_llm_provider == "openai"
+            and _received_llm_provider != _logging_obj_llm_provider
+        ):
+            _model = "{}/{}".format(_logging_obj_llm_provider, _model)
         model_response = ModelResponse(
-            stream=True, model=self.model, stream_options=self.stream_options
+            stream=True, model=_model, stream_options=self.stream_options
         )
         if self.response_id is not None:
             model_response.id = self.response_id
@@ -11105,10 +11079,9 @@ class CustomStreamWrapper:
             self.response_id = model_response.id
         if self.system_fingerprint is not None:
             model_response.system_fingerprint = self.system_fingerprint
-        model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
+        model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
         model_response._hidden_params["created_at"] = time.time()
-        model_response.choices = [StreamingChoices()]
-        model_response.choices[0].finish_reason = None
+        model_response.choices = [StreamingChoices(finish_reason=None)]
         return model_response
 
     def is_delta_empty(self, delta: Delta) -> bool: