diff --git a/docs/my-website/docs/proxy/embedding.md b/docs/my-website/docs/proxy/embedding.md new file mode 100644 index 000000000..ed724beed --- /dev/null +++ b/docs/my-website/docs/proxy/embedding.md @@ -0,0 +1,42 @@ +# Embeddings +Route between Sagemaker, Bedrock, Azure embeddings + + +Here's how to route between GPT-J embedding (sagemaker endpoint), Amazon Titan embedding (Bedrock) and Azure OpenAI embedding on the proxy server: + +1. Save them in your config.yaml +```yaml +model_list: + - model_name: sagemaker-embeddings + litellm_params: + model: "sagemaker/berri-benchmarking-gpt-j-6b-fp16" + - model_name: amazon-embeddings + litellm_params: + model: "bedrock/amazon.titan-embed-text-v1" + - model_name: azure-embeddings + litellm_params: + model: "azure/azure-embedding-model" + api_base: "os.environ/AZURE_API_BASE" # os.getenv("AZURE_API_BASE") + api_key: "os.environ/AZURE_API_KEY" # os.getenv("AZURE_API_KEY") + api_version: "2023-07-01-preview" + +general_settings: + master_key: sk-1234 # [OPTIONAL] if set all calls to proxy will require either this key or a valid generated token +``` + +2. Start the proxy +```shell +$ litellm --config /path/to/config.yaml +``` + +3. Test the embedding call + +```shell +curl --location 'http://0.0.0.0:8000/v1/embeddings' \ +--header 'Authorization: Bearer sk-1234' \ +--header 'Content-Type: application/json' \ +--data '{ + "input": "The food was delicious and the waiter..", + "model": "sagemaker-embeddings", +}' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 11f81fa4d..12fa7ec9b 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -96,7 +96,8 @@ const sidebars = { }, items: [ "proxy/quick_start", - "proxy/configs", + "proxy/configs", + "proxy/embedding", "proxy/load_balancing", "proxy/virtual_keys", "proxy/model_management", diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index a2258a170..73bb913c9 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -127,7 +127,6 @@ class CompletionCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/obse assert isinstance(kwargs['original_response'], (str, litellm.CustomStreamWrapper)) assert isinstance(kwargs['additional_args'], (dict, type(None))) assert isinstance(kwargs['log_event_type'], str) - assert isinstance(kwargs["cache_hit"], Optional[bool]) except: print(f"Assertion Error: {traceback.format_exc()}") self.errors.append(traceback.format_exc())