organize docs

2024-08-03 12:47:22 -07:00 · 2024-08-03 12:47:22 -07:00 · 6faeac4cf6
commit 6faeac4cf6
parent 4052b0d4c4
3 changed files with 72 additions and 67 deletions
--- a/docs/my-website/docs/proxy/custom_pricing.md
+++ b/docs/my-website/docs/proxy/custom_pricing.md
@ -1,6 +1,6 @@
 import Image from '@theme/IdealImage';
-# Custom Pricing - Sagemaker, etc. 
+# Custom LLM Pricing - Sagemaker, Azure, etc
 Use this to register custom pricing for models. 
@ -16,39 +16,9 @@ LiteLLM already has pricing for any model in our [model cost map](https://github
 :::
-## Quick Start 
+## Cost Per Second (e.g. Sagemaker)
-Register custom pricing for sagemaker completion model. 
+### Usage with LiteLLM Proxy Server
 For cost per second pricing, you **just** need to register `input_cost_per_second`. 
 ```python
 # !pip install boto3 
 from litellm import completion, completion_cost 
 os.environ["AWS_ACCESS_KEY_ID"] = ""
 os.environ["AWS_SECRET_ACCESS_KEY"] = ""
 os.environ["AWS_REGION_NAME"] = ""
 def test_completion_sagemaker():
    try:
        print("testing sagemaker")
        response = completion(
            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
            input_cost_per_second=0.000420,
        )
        # Add any assertions here to check the response
        print(response)
        cost = completion_cost(completion_response=response)
        print(cost)
    except Exception as e:
        raise Exception(f"Error occurred: {e}")
 ```
 ### Usage with OpenAI Proxy Server
 **Step 1: Add pricing to config.yaml**
 ```yaml
@ -75,38 +45,7 @@ litellm /path/to/config.yaml
 ## Cost Per Token (e.g. Azure)
-
+### Usage with LiteLLM Proxy Server
 ```python
 # !pip install boto3 
 from litellm import completion, completion_cost 
 ## set ENV variables
 os.environ["AZURE_API_KEY"] = ""
 os.environ["AZURE_API_BASE"] = ""
 os.environ["AZURE_API_VERSION"] = ""
 def test_completion_azure_model():
    try:
        print("testing azure custom pricing")
        # azure call
        response = completion(
          model = "azure/<your_deployment_name>", 
          messages = [{ "content": "Hello, how are you?","role": "user"}]
          input_cost_per_token=0.005,
          output_cost_per_token=1,
        )
        # Add any assertions here to check the response
        print(response)
        cost = completion_cost(completion_response=response)
        print(cost)
    except Exception as e:
        raise Exception(f"Error occurred: {e}")
 test_completion_azure_model()
 ```
 ### Usage with OpenAI Proxy Server
 ```yaml
 model_list:
--- a/docs/my-website/docs/sdk_custom_pricing.md
+++ b/docs/my-website/docs/sdk_custom_pricing.md
@ -0,0 +1,65 @@
 # Custom Pricing - SageMaker, Azure, etc
 Register custom pricing for sagemaker completion model. 
 For cost per second pricing, you **just** need to register `input_cost_per_second`. 
 ```python
 # !pip install boto3 
 from litellm import completion, completion_cost 
 os.environ["AWS_ACCESS_KEY_ID"] = ""
 os.environ["AWS_SECRET_ACCESS_KEY"] = ""
 os.environ["AWS_REGION_NAME"] = ""
 def test_completion_sagemaker():
    try:
        print("testing sagemaker")
        response = completion(
            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
            input_cost_per_second=0.000420,
        )
        # Add any assertions here to check the response
        print(response)
        cost = completion_cost(completion_response=response)
        print(cost)
    except Exception as e:
        raise Exception(f"Error occurred: {e}")
 ```
 ## Cost Per Token (e.g. Azure)
 ```python
 # !pip install boto3 
 from litellm import completion, completion_cost 
 ## set ENV variables
 os.environ["AZURE_API_KEY"] = ""
 os.environ["AZURE_API_BASE"] = ""
 os.environ["AZURE_API_VERSION"] = ""
 def test_completion_azure_model():
    try:
        print("testing azure custom pricing")
        # azure call
        response = completion(
          model = "azure/<your_deployment_name>", 
          messages = [{ "content": "Hello, how are you?","role": "user"}]
          input_cost_per_token=0.005,
          output_cost_per_token=1,
        )
        # Add any assertions here to check the response
        print(response)
        cost = completion_cost(completion_response=response)
        print(cost)
    except Exception as e:
        raise Exception(f"Error occurred: {e}")
 test_completion_azure_model()
 ```
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -42,6 +42,7 @@ const sidebars = {
        "proxy/configs",
        "proxy/reliability",
        "proxy/cost_tracking",
        "proxy/custom_pricing",
        "proxy/self_serve",
        "proxy/virtual_keys",
        {
@ -183,7 +184,6 @@ const sidebars = {
        "anthropic_completion"
      ],
    },
    "proxy/custom_pricing",
    {
      type: "category", 
      label: "Secret Manager", 
@ -199,6 +199,8 @@ const sidebars = {
        "routing",
        "scheduler",
        "set_keys",
        "completion/token_usage",
        "sdk_custom_pricing",
        "budget_manager",
        "caching/all_caches",
        {
@ -208,7 +210,6 @@ const sidebars = {
        },
      ],
    },
    "completion/token_usage",
    "load_test",
    {
      type: "category",