docs(embedding.md): add embedding docs to proxy

This commit is contained in:
Krrish Dholakia 2023-12-13 18:58:46 -08:00
parent 8d688b6217
commit 73ecc012a9
3 changed files with 44 additions and 2 deletions

View file

@ -0,0 +1,42 @@
# Embeddings
Route between Sagemaker, Bedrock, Azure embeddings
Here's how to route between GPT-J embedding (sagemaker endpoint), Amazon Titan embedding (Bedrock) and Azure OpenAI embedding on the proxy server:
1. Save them in your config.yaml
```yaml
model_list:
- model_name: sagemaker-embeddings
litellm_params:
model: "sagemaker/berri-benchmarking-gpt-j-6b-fp16"
- model_name: amazon-embeddings
litellm_params:
model: "bedrock/amazon.titan-embed-text-v1"
- model_name: azure-embeddings
litellm_params:
model: "azure/azure-embedding-model"
api_base: "os.environ/AZURE_API_BASE" # os.getenv("AZURE_API_BASE")
api_key: "os.environ/AZURE_API_KEY" # os.getenv("AZURE_API_KEY")
api_version: "2023-07-01-preview"
general_settings:
master_key: sk-1234 # [OPTIONAL] if set all calls to proxy will require either this key or a valid generated token
```
2. Start the proxy
```shell
$ litellm --config /path/to/config.yaml
```
3. Test the embedding call
```shell
curl --location 'http://0.0.0.0:8000/v1/embeddings' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"input": "The food was delicious and the waiter..",
"model": "sagemaker-embeddings",
}'
```

View file

@ -97,6 +97,7 @@ const sidebars = {
items: [ items: [
"proxy/quick_start", "proxy/quick_start",
"proxy/configs", "proxy/configs",
"proxy/embedding",
"proxy/load_balancing", "proxy/load_balancing",
"proxy/virtual_keys", "proxy/virtual_keys",
"proxy/model_management", "proxy/model_management",

View file

@ -127,7 +127,6 @@ class CompletionCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/obse
assert isinstance(kwargs['original_response'], (str, litellm.CustomStreamWrapper)) assert isinstance(kwargs['original_response'], (str, litellm.CustomStreamWrapper))
assert isinstance(kwargs['additional_args'], (dict, type(None))) assert isinstance(kwargs['additional_args'], (dict, type(None)))
assert isinstance(kwargs['log_event_type'], str) assert isinstance(kwargs['log_event_type'], str)
assert isinstance(kwargs["cache_hit"], Optional[bool])
except: except:
print(f"Assertion Error: {traceback.format_exc()}") print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc()) self.errors.append(traceback.format_exc())