From 6faeac4cf6866dcc342e217710b155079f7f755b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 12:47:22 -0700 Subject: [PATCH] organize docs --- docs/my-website/docs/proxy/custom_pricing.md | 69 ++------------------ docs/my-website/docs/sdk_custom_pricing.md | 65 ++++++++++++++++++ docs/my-website/sidebars.js | 5 +- 3 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 docs/my-website/docs/sdk_custom_pricing.md diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md index 0b747f119..51634021b 100644 --- a/docs/my-website/docs/proxy/custom_pricing.md +++ b/docs/my-website/docs/proxy/custom_pricing.md @@ -1,6 +1,6 @@ import Image from '@theme/IdealImage'; -# Custom Pricing - Sagemaker, etc. +# Custom LLM Pricing - Sagemaker, Azure, etc Use this to register custom pricing for models. @@ -16,39 +16,9 @@ LiteLLM already has pricing for any model in our [model cost map](https://github ::: -## Quick Start +## Cost Per Second (e.g. Sagemaker) -Register custom pricing for sagemaker completion model. - -For cost per second pricing, you **just** need to register `input_cost_per_second`. - -```python -# !pip install boto3 -from litellm import completion, completion_cost - -os.environ["AWS_ACCESS_KEY_ID"] = "" -os.environ["AWS_SECRET_ACCESS_KEY"] = "" -os.environ["AWS_REGION_NAME"] = "" - - -def test_completion_sagemaker(): - try: - print("testing sagemaker") - response = completion( - model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", - messages=[{"role": "user", "content": "Hey, how's it going?"}], - input_cost_per_second=0.000420, - ) - # Add any assertions here to check the response - print(response) - cost = completion_cost(completion_response=response) - print(cost) - except Exception as e: - raise Exception(f"Error occurred: {e}") - -``` - -### Usage with OpenAI Proxy Server +### Usage with LiteLLM Proxy Server **Step 1: Add pricing to config.yaml** ```yaml @@ -75,38 +45,7 @@ litellm /path/to/config.yaml ## Cost Per Token (e.g. Azure) - -```python -# !pip install boto3 -from litellm import completion, completion_cost - -## set ENV variables -os.environ["AZURE_API_KEY"] = "" -os.environ["AZURE_API_BASE"] = "" -os.environ["AZURE_API_VERSION"] = "" - - -def test_completion_azure_model(): - try: - print("testing azure custom pricing") - # azure call - response = completion( - model = "azure/", - messages = [{ "content": "Hello, how are you?","role": "user"}] - input_cost_per_token=0.005, - output_cost_per_token=1, - ) - # Add any assertions here to check the response - print(response) - cost = completion_cost(completion_response=response) - print(cost) - except Exception as e: - raise Exception(f"Error occurred: {e}") - -test_completion_azure_model() -``` - -### Usage with OpenAI Proxy Server +### Usage with LiteLLM Proxy Server ```yaml model_list: diff --git a/docs/my-website/docs/sdk_custom_pricing.md b/docs/my-website/docs/sdk_custom_pricing.md new file mode 100644 index 000000000..c85771151 --- /dev/null +++ b/docs/my-website/docs/sdk_custom_pricing.md @@ -0,0 +1,65 @@ +# Custom Pricing - SageMaker, Azure, etc + +Register custom pricing for sagemaker completion model. + +For cost per second pricing, you **just** need to register `input_cost_per_second`. + +```python +# !pip install boto3 +from litellm import completion, completion_cost + +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" + + +def test_completion_sagemaker(): + try: + print("testing sagemaker") + response = completion( + model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + input_cost_per_second=0.000420, + ) + # Add any assertions here to check the response + print(response) + cost = completion_cost(completion_response=response) + print(cost) + except Exception as e: + raise Exception(f"Error occurred: {e}") + +``` + + +## Cost Per Token (e.g. Azure) + + +```python +# !pip install boto3 +from litellm import completion, completion_cost + +## set ENV variables +os.environ["AZURE_API_KEY"] = "" +os.environ["AZURE_API_BASE"] = "" +os.environ["AZURE_API_VERSION"] = "" + + +def test_completion_azure_model(): + try: + print("testing azure custom pricing") + # azure call + response = completion( + model = "azure/", + messages = [{ "content": "Hello, how are you?","role": "user"}] + input_cost_per_token=0.005, + output_cost_per_token=1, + ) + # Add any assertions here to check the response + print(response) + cost = completion_cost(completion_response=response) + print(cost) + except Exception as e: + raise Exception(f"Error occurred: {e}") + +test_completion_azure_model() +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index afb778373..6674d91ac 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -42,6 +42,7 @@ const sidebars = { "proxy/configs", "proxy/reliability", "proxy/cost_tracking", + "proxy/custom_pricing", "proxy/self_serve", "proxy/virtual_keys", { @@ -183,7 +184,6 @@ const sidebars = { "anthropic_completion" ], }, - "proxy/custom_pricing", { type: "category", label: "Secret Manager", @@ -199,6 +199,8 @@ const sidebars = { "routing", "scheduler", "set_keys", + "completion/token_usage", + "sdk_custom_pricing", "budget_manager", "caching/all_caches", { @@ -208,7 +210,6 @@ const sidebars = { }, ], }, - "completion/token_usage", "load_test", { type: "category",