(docs) proxy performance

2025-04-27 11:43:54 +00:00 · 2023-11-24 17:07:46 -08:00 · 2023-11-24 17:07:46 -08:00 · 32b7c236e6
commit 32b7c236e6
parent 0f7e6c5101
4 changed files with 113 additions and 0 deletions
--- a/docs/my-website/docs/simple_proxy.md
+++ b/docs/my-website/docs/simple_proxy.md
@ -1077,6 +1077,18 @@ Expected output on Langfuse
 <Image img={require('../img/langfuse_small.png')} />
 ## LiteLLM Proxy Performance
 ### Throughput - 30% Increase
 LiteLLM proxy + Load Balancer gives **30% increase** in throughput compared to Raw OpenAI API
 <Image img={require('../img/throughput.png')} />
 ### Latency Added - 0.00325 seconds
 LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw OpenAI API
 <Image img={require('../img/latency.png')} />
 ## Proxy CLI Arguments
--- a/docs/my-website/img/latency.png
+++ b/docs/my-website/img/latency.png
--- a/docs/my-website/img/throughput.png
+++ b/docs/my-website/img/throughput.png
--- a/litellm/proxy/tests/load_test_embedding_proxy.py
+++ b/litellm/proxy/tests/load_test_embedding_proxy.py
@ -0,0 +1,101 @@
 # test time it takes to make 100 concurrent embedding requests to OpenaI 
 import sys, os
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 litellm.set_verbose=False
 question = "embed this very long text" * 100
 # make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
 #  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere 
 # show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions 
 import concurrent.futures
 import random
 import time
 # Function to make concurrent calls to OpenAI API
 def make_openai_completion(question):
    try:
        start_time = time.time()
        import openai
        client = openai.OpenAI(api_key=os.environ['OPENAI_API_KEY'], base_url="http://0.0.0.0:8000") #base_url="http://0.0.0.0:8000",
        response = client.embeddings.create(
            model="text-embedding-ada-002",
            input=[question],
        )
        print(response)
        end_time = time.time()
        # Log the request details
        # with open("request_log.txt", "a") as log_file:
        #     log_file.write(
        #         f"Question: {question[:100]}\nResponse ID:{response.id} Content:{response.choices[0].message.content[:10]}\nTime: {end_time - start_time:.2f} seconds\n\n"
        #     )
        return response
    except Exception as e:
        # Log exceptions for failed calls
        # with open("error_log.txt", "a") as error_log_file:
        #     error_log_file.write(
        #         f"\nException: {str(e)}\n\n"
        #     )
        return None
 start_time = time.time()
 # Number of concurrent calls (you can adjust this)
 concurrent_calls = 500
 # List to store the futures of concurrent calls
 futures = []
 # Make concurrent calls
 with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
    for _ in range(concurrent_calls):
        futures.append(executor.submit(make_openai_completion, question))
 # Wait for all futures to complete
 concurrent.futures.wait(futures)
 # Summarize the results
 successful_calls = 0
 failed_calls = 0
 for future in futures:
    if future.result() is not None:
        successful_calls += 1
    else:
        failed_calls += 1
 end_time = time.time()
 # Calculate the duration
 duration = end_time - start_time
 print(f"Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
 print(f"Total Time: {duration:.2f} seconds")
 # # Display content of the logs
 # with open("request_log.txt", "r") as log_file:
 #     print("\nRequest Log:\n", log_file.read())
 # with open("error_log.txt", "r") as error_log_file:
 #     print("\nError Log:\n", error_log_file.read())