diff --git a/docs/my-website/docs/load_test.md b/docs/my-website/docs/load_test.md
index f568b5696..94165fb7b 100644
--- a/docs/my-website/docs/load_test.md
+++ b/docs/my-website/docs/load_test.md
@@ -1,5 +1,84 @@
+import Image from '@theme/IdealImage';
+
# 🔥 Load Test LiteLLM
+## Load Test LiteLLM Proxy - 1500+ req/s
+
+## 1500+ concurrent requests/s
+
+LiteLLM proxy has been load tested to handle 1500+ concurrent req/s
+
+```python
+import time, asyncio
+from openai import AsyncOpenAI, AsyncAzureOpenAI
+import uuid
+import traceback
+
+# base_url - litellm proxy endpoint
+# api_key - litellm proxy api-key, is created proxy with auth
+litellm_client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234")
+
+
+async def litellm_completion():
+ # Your existing code for litellm_completion goes here
+ try:
+ response = await litellm_client.chat.completions.create(
+ model="azure-gpt-3.5",
+ messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
+ )
+ print(response)
+ return response
+
+ except Exception as e:
+ # If there's an exception, log the error message
+ with open("error_log.txt", "a") as error_log:
+ error_log.write(f"Error during completion: {str(e)}\n")
+ pass
+
+
+async def main():
+ for i in range(1):
+ start = time.time()
+ n = 1500 # Number of concurrent tasks
+ tasks = [litellm_completion() for _ in range(n)]
+
+ chat_completions = await asyncio.gather(*tasks)
+
+ successful_completions = [c for c in chat_completions if c is not None]
+
+ # Write errors to error_log.txt
+ with open("error_log.txt", "a") as error_log:
+ for completion in chat_completions:
+ if isinstance(completion, str):
+ error_log.write(completion + "\n")
+
+ print(n, time.time() - start, len(successful_completions))
+ time.sleep(10)
+
+
+if __name__ == "__main__":
+ # Blank out contents of error_log.txt
+ open("error_log.txt", "w").close()
+
+ asyncio.run(main())
+
+```
+
+### Throughput - 30% Increase
+LiteLLM proxy + Load Balancer gives **30% increase** in throughput compared to Raw OpenAI API
+
+
+### Latency Added - 0.00325 seconds
+LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw OpenAI API
+
+
+
+### Testing LiteLLM Proxy with Locust
+- 1 LiteLLM container can handle ~140 requests/second with 0.4 failures
+
+
+
+## Load Test LiteLLM SDK vs OpenAI
Here is a script to load test LiteLLM vs OpenAI
```python
@@ -84,4 +163,5 @@ async def loadtest_fn():
# Run the event loop to execute the async function
asyncio.run(loadtest_fn())
-```
\ No newline at end of file
+```
+
diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md
index 6de8625d0..4b51f094c 100644
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@@ -350,17 +350,3 @@ Run the command `docker-compose up` or `docker compose up` as per your docker in
Your LiteLLM container should be running now on the defined port e.g. `8000`.
-
-
-
-## LiteLLM Proxy Performance
-
-LiteLLM proxy has been load tested to handle 1500 req/s.
-
-### Throughput - 30% Increase
-LiteLLM proxy + Load Balancer gives **30% increase** in throughput compared to Raw OpenAI API
-
-
-### Latency Added - 0.00325 seconds
-LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw OpenAI API
-
diff --git a/docs/my-website/img/locust.png b/docs/my-website/img/locust.png
new file mode 100644
index 000000000..1bcedf1d0
Binary files /dev/null and b/docs/my-website/img/locust.png differ