diff --git a/docs/.DS_Store b/docs/.DS_Store
deleted file mode 100644
index 2b1524840..000000000
Binary files a/docs/.DS_Store and /dev/null differ
diff --git a/docs/my-website/docs/budget_manager.md b/docs/my-website/docs/budget_manager.md
index a845ddd0a..f77087461 100644
--- a/docs/my-website/docs/budget_manager.md
+++ b/docs/my-website/docs/budget_manager.md
@@ -31,7 +31,7 @@ BudgetManager creates a dictionary to manage the user budgets, where the key is
 
 ### get model-breakdown per user 
 
-```
+```python
 user = "1234"
 # ...
 budget_manager.get_model_cost(user=user) # {"gpt-3.5-turbo-0613": 7.3e-05}
diff --git a/docs/my-website/docs/caching/caching.md b/docs/my-website/docs/caching/caching.md
index c3ea0d7fb..996c5ec7d 100644
--- a/docs/my-website/docs/caching/caching.md
+++ b/docs/my-website/docs/caching/caching.md
@@ -89,7 +89,7 @@ response3 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1
 For resposes that were returned as cache hit, the response includes a param `cache` = True 
 
 Example response with cache hit
-```
+```python
 {
     'cache': True,
     'id': 'chatcmpl-7wggdzd6OXhgE2YhcLJHJNZsEWzZ2', 
diff --git a/docs/my-website/docs/completion/mock_requests.md b/docs/my-website/docs/completion/mock_requests.md
index 19dd72f11..3df84b295 100644
--- a/docs/my-website/docs/completion/mock_requests.md
+++ b/docs/my-website/docs/completion/mock_requests.md
@@ -39,7 +39,7 @@ for chunk in response:
 
 ## (Non-streaming) Mock Response Object 
 
-```
+```json
 {
   "choices": [
     {
diff --git a/docs/my-website/docs/completion/model_alias.md b/docs/my-website/docs/completion/model_alias.md
index b73d3a5c2..5f910a6fe 100644
--- a/docs/my-website/docs/completion/model_alias.md
+++ b/docs/my-website/docs/completion/model_alias.md
@@ -6,7 +6,7 @@ LiteLLM simplifies this by letting you pass in a model alias mapping.
 
 # expected format
 
-```
+```python
 litellm.model_alias_map = {
     # a dictionary containing a mapping of the alias string to the actual litellm model name string
     "model_alias": "litellm_model_name"
@@ -16,7 +16,7 @@ litellm.model_alias_map = {
 # usage 
 
 ### Relevant Code
-```
+```python
 model_alias_map = {
     "GPT-3.5": "gpt-3.5-turbo-16k",
     "llama2": "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf"
@@ -26,7 +26,7 @@ litellm.model_alias_map = model_alias_map
 ```
 
 ### Complete Code
-```
+```python
 import litellm 
 from litellm import completion 
 
diff --git a/docs/my-website/docs/completion/stream.md b/docs/my-website/docs/completion/stream.md
index 6cb813ae9..d22a85818 100644
--- a/docs/my-website/docs/completion/stream.md
+++ b/docs/my-website/docs/completion/stream.md
@@ -14,10 +14,9 @@ for chunk in response:
 ```
 
 ## Async Completion
-Asynchronous Completion with LiteLLM
-LiteLLM provides an asynchronous version of the completion function called `acompletion`
+Asynchronous Completion with LiteLLM. LiteLLM provides an asynchronous version of the completion function called `acompletion`
 ### Usage
-```
+```python
 from litellm import acompletion
 import asyncio
 
@@ -37,7 +36,7 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
 
 ### Usage
 Here's an example of using it with openai. But this 
-```
+```python
 from litellm import completion
 import asyncio
 
diff --git a/docs/my-website/docs/debugging/hosted_debugging.md b/docs/my-website/docs/debugging/hosted_debugging.md
index 89b3a5a77..7e894e7ee 100644
--- a/docs/my-website/docs/debugging/hosted_debugging.md
+++ b/docs/my-website/docs/debugging/hosted_debugging.md
@@ -32,7 +32,7 @@ Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet wi
 
 **Add it to your .env**
 
-```
+```python
 import os 
 
 os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
@@ -40,13 +40,13 @@ os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with
 ```
 
 **Turn on LiteLLM Client**
-```
+```python
 import litellm 
 litellm.client = True
 ```
 
 ### 3. Make a normal `completion()` call
-```
+```python
 import litellm 
 from litellm import completion
 import os 
diff --git a/docs/my-website/docs/debugging/local_debugging.md b/docs/my-website/docs/debugging/local_debugging.md
index 5a86d27ee..cd2b4e444 100644
--- a/docs/my-website/docs/debugging/local_debugging.md
+++ b/docs/my-website/docs/debugging/local_debugging.md
@@ -4,7 +4,7 @@ There's 2 ways to do local debugging - `litellm.set_verbose=True` and by passing
 ## Set Verbose 
 
 This is good for getting print statements for everything litellm is doing.
-```
+```python
 from litellm import completion
 
 litellm.set_verbose=True # 👈 this is the 1-line change you need to make
@@ -31,13 +31,13 @@ In that case, LiteLLM allows you to pass in a custom logging function to see / m
 
 Your custom function 
 
-```
+```python
 def my_custom_logging_fn(model_call_dict):
     print(f"model call details: {model_call_dict}")
 ```
 
 ### Complete Example
-```
+```python
 from litellm import completion
 
 def my_custom_logging_fn(model_call_dict):
diff --git a/docs/my-website/docs/extras/secret.md b/docs/my-website/docs/extras/secret.md
index 74878cbe9..59f046eff 100644
--- a/docs/my-website/docs/extras/secret.md
+++ b/docs/my-website/docs/extras/secret.md
@@ -12,7 +12,7 @@ Integrates with [Infisical's Secret Manager](https://infisical.com/) for secure
 ### Usage
 liteLLM manages reading in your LLM API secrets/env variables from Infisical for you
 
-```
+```python
 import litellm
 from infisical import InfisicalClient
 
diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md
index 273d22d4f..de89ba8da 100644
--- a/docs/my-website/docs/observability/helicone_integration.md
+++ b/docs/my-website/docs/observability/helicone_integration.md
@@ -8,7 +8,7 @@ In this case, we want to log requests to Helicone when a request succeeds.
 
 ### Approach 1: Use Callbacks 
 Use just 1 line of code, to instantly log your responses **across all providers** with helicone: 
-```
+```python
 litellm.success_callback=["helicone"]
 ```
 
@@ -39,7 +39,7 @@ If you want to use Helicone to proxy your OpenAI/Azure requests, then you can -
 - Pass in helicone request headers via: `litellm.headers` 
 
 Complete Code
-```
+```python
 import litellm
 from litellm import completion
 
diff --git a/docs/my-website/docs/observability/llmonitor_integration.md b/docs/my-website/docs/observability/llmonitor_integration.md
index f267dfefc..544a5488a 100644
--- a/docs/my-website/docs/observability/llmonitor_integration.md
+++ b/docs/my-website/docs/observability/llmonitor_integration.md
@@ -16,10 +16,9 @@ First, sign up to get an app ID on the [LLMonitor dashboard](https://llmonitor.c
 
 Use just 2 lines of code, to instantly log your responses **across all providers** with llmonitor:
 
-```
+```python
 litellm.success_callback = ["llmonitor"]
 litellm.failure_callback = ["llmonitor"]
-
 ```
 
 Complete code
diff --git a/docs/my-website/docs/observability/supabase_integration.md b/docs/my-website/docs/observability/supabase_integration.md
index d9fbc2b5a..6ca775768 100644
--- a/docs/my-website/docs/observability/supabase_integration.md
+++ b/docs/my-website/docs/observability/supabase_integration.md
@@ -35,7 +35,7 @@ create table
 ### Use Callbacks 
 Use just 2 lines of code, to instantly see costs and log your responses **across all providers** with Supabase: 
 
-```
+```python
 litellm.success_callback=["supabase"]
 litellm.failure_callback=["supabase"]
 ```
diff --git a/docs/my-website/docs/observability/traceloop_integration.md b/docs/my-website/docs/observability/traceloop_integration.md
index 2463f40fe..ea848411e 100644
--- a/docs/my-website/docs/observability/traceloop_integration.md
+++ b/docs/my-website/docs/observability/traceloop_integration.md
@@ -13,7 +13,7 @@ While Traceloop is still in beta, [ping them](nir@traceloop.com) and mention you
 
 Then, install the Traceloop SDK:
 
-```bash
+```
 pip install traceloop
 ```
 
diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
index 5af0307be..02dbd5441 100644
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@@ -2,8 +2,8 @@
 LiteLLM supports Claude-1, 1.2 and Claude-2.
 
 ### API KEYS
-```
-import os 
+```python
+import os
 
 os.environ["ANTHROPIC_API_KEY"] = ""
 ```
diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md
index 7ba2cd701..57b8c7c9d 100644
--- a/docs/my-website/docs/providers/azure.md
+++ b/docs/my-website/docs/providers/azure.md
@@ -2,8 +2,8 @@
 LiteLLM supports Azure Chat + Embedding calls. 
 
 ### API KEYS
-```
-import os 
+```python
+import os
 
 os.environ["AZURE_API_KEY"] = ""
 os.environ["AZURE_API_BASE"] = ""
diff --git a/docs/my-website/docs/providers/custom.md b/docs/my-website/docs/providers/custom.md
index acc0dede9..81b92f0a0 100644
--- a/docs/my-website/docs/providers/custom.md
+++ b/docs/my-website/docs/providers/custom.md
@@ -49,7 +49,7 @@ resp = requests.post(
 ```
 
 Outputs from your custom LLM api bases should follow this format:   
-```
+```python
 {
     'data': [
         {
diff --git a/docs/my-website/docs/providers/huggingface.md b/docs/my-website/docs/providers/huggingface.md
index 9dda860a5..5ae202fa5 100644
--- a/docs/my-website/docs/providers/huggingface.md
+++ b/docs/my-website/docs/providers/huggingface.md
@@ -96,7 +96,7 @@ Model name - `WizardLM/WizardCoder-Python-34B-V1.0`
 
 Model id - `https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud`
 
-```
+```python
 import os 
 from litellm import completion 
 
@@ -115,7 +115,7 @@ print(response)
 
 Same as the OpenAI format, but also includes logprobs. [See the code](https://github.com/BerriAI/litellm/blob/b4b2dbf005142e0a483d46a07a88a19814899403/litellm/llms/huggingface_restapi.py#L115)
 
-```
+```json
 {
   "choices": [
     {
diff --git a/docs/my-website/docs/providers/togetherai.md b/docs/my-website/docs/providers/togetherai.md
index 8ceb196be..2e9ca1a20 100644
--- a/docs/my-website/docs/providers/togetherai.md
+++ b/docs/my-website/docs/providers/togetherai.md
@@ -71,7 +71,7 @@ The accepted template format is: [Reference](https://huggingface.co/OpenAssistan
 ```
 
 Let's register our custom prompt template: [Implementation Code](https://github.com/BerriAI/litellm/blob/64f3d3c56ef02ac5544983efc78293de31c1c201/litellm/llms/prompt_templates/factory.py#L77)
-```
+```python
 import litellm 
 
 litellm.register_prompt_template(
@@ -84,7 +84,7 @@ litellm.register_prompt_template(
 
 Let's use it! 
 
-```
+```python
 from litellm import completion 
 
 # set env variable 
@@ -97,7 +97,7 @@ completion(model="together_ai/OpenAssistant/llama2-70b-oasst-sft-v10", messages=
 
 **Complete Code**
 
-```
+```python
 import litellm 
 from litellm import completion
 
@@ -119,7 +119,7 @@ print(response)
 ```
 
 **Output**
-```
+```json
 {
   "choices": [
     {
diff --git a/docs/my-website/docs/providers/vllm.md b/docs/my-website/docs/providers/vllm.md
index d97f09eae..ea0661aa0 100644
--- a/docs/my-website/docs/providers/vllm.md
+++ b/docs/my-website/docs/providers/vllm.md
@@ -43,7 +43,7 @@ print(response)
 
 ### Batch Completion
 
-```
+```python
 from litellm import batch_completion
 
 model_name = "facebook/opt-125m"
diff --git a/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md b/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
index faf117e1c..45d174e76 100644
--- a/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
+++ b/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
@@ -64,7 +64,7 @@ The accepted template format is: [Reference](https://huggingface.co/OpenAssistan
 ```
 
 Let's register our custom prompt template: [Implementation Code](https://github.com/BerriAI/litellm/blob/64f3d3c56ef02ac5544983efc78293de31c1c201/litellm/llms/prompt_templates/factory.py#L77)
-```
+```python
 import litellm 
 
 litellm.register_prompt_template(
@@ -77,7 +77,7 @@ litellm.register_prompt_template(
 
 Let's use it! 
 
-```
+```python
 from litellm import completion 
 
 # set env variable 
@@ -90,7 +90,7 @@ completion(model="together_ai/OpenAssistant/llama2-70b-oasst-sft-v10", messages=
 
 **Complete Code**
 
-```
+```python
 import litellm 
 from litellm import completion
 
@@ -112,7 +112,7 @@ print(response)
 ```
 
 **Output**
-```
+```json
 {
   "choices": [
     {
diff --git a/docs/my-website/docs/tutorials/huggingface_tutorial.md b/docs/my-website/docs/tutorials/huggingface_tutorial.md
index 8f37bb4b9..5d569ab8d 100644
--- a/docs/my-website/docs/tutorials/huggingface_tutorial.md
+++ b/docs/my-website/docs/tutorials/huggingface_tutorial.md
@@ -16,7 +16,7 @@ In this case, let's try and call 3 models:
 
 Here's the complete example:
 
-```
+```python
 from litellm import completion 
 
 model = "deepset/deberta-v3-large-squad2"
@@ -36,7 +36,7 @@ What's happening?
 We've deployed `meta-llama/Llama-2-7b-hf` behind a public endpoint - `https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud`.
 
 Let's try it out: 
-```
+```python
 from litellm import completion 
 
 model = "meta-llama/Llama-2-7b-hf"
@@ -60,15 +60,15 @@ Either via environment variables, by setting it as a package variable or when ca
 
 **Setting via environment variables**  
 Here's the 1 line of code you need to add 
-```
-os.environ["HF_TOKEN] = "..."
+```python
+os.environ["HF_TOKEN"] = "..."
 ```
 
 Here's the full code: 
-```
+```python
 from litellm import completion 
 
-os.environ["HF_TOKEN] = "..."
+os.environ["HF_TOKEN"] = "..."
 
 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
@@ -80,12 +80,12 @@ completion(model=model, messages=messages, custom_llm_provider="huggingface", ap
 
 **Setting it as package variable**  
 Here's the 1 line of code you need to add 
-```
+```python
 litellm.huggingface_key = "..."
 ```
 
 Here's the full code: 
-```
+```python
 import litellm
 from litellm import completion 
 
@@ -100,13 +100,13 @@ completion(model=model, messages=messages, custom_llm_provider="huggingface", ap
 ```
 
 **Passed in during completion call**  
-```
+```python
 completion(..., api_key="...")
 ```
 
 Here's the full code: 
 
-```
+```python
 from litellm import completion 
 
 model = "meta-llama/Llama-2-7b-hf"