forked from phoenix/litellm-mirror
docs(embedding.md): add embedding docs to proxy
This commit is contained in:
parent
8d688b6217
commit
73ecc012a9
3 changed files with 44 additions and 2 deletions
42
docs/my-website/docs/proxy/embedding.md
Normal file
42
docs/my-website/docs/proxy/embedding.md
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# Embeddings
|
||||||
|
Route between Sagemaker, Bedrock, Azure embeddings
|
||||||
|
|
||||||
|
|
||||||
|
Here's how to route between GPT-J embedding (sagemaker endpoint), Amazon Titan embedding (Bedrock) and Azure OpenAI embedding on the proxy server:
|
||||||
|
|
||||||
|
1. Save them in your config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: sagemaker-embeddings
|
||||||
|
litellm_params:
|
||||||
|
model: "sagemaker/berri-benchmarking-gpt-j-6b-fp16"
|
||||||
|
- model_name: amazon-embeddings
|
||||||
|
litellm_params:
|
||||||
|
model: "bedrock/amazon.titan-embed-text-v1"
|
||||||
|
- model_name: azure-embeddings
|
||||||
|
litellm_params:
|
||||||
|
model: "azure/azure-embedding-model"
|
||||||
|
api_base: "os.environ/AZURE_API_BASE" # os.getenv("AZURE_API_BASE")
|
||||||
|
api_key: "os.environ/AZURE_API_KEY" # os.getenv("AZURE_API_KEY")
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234 # [OPTIONAL] if set all calls to proxy will require either this key or a valid generated token
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
```shell
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test the embedding call
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:8000/v1/embeddings' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"input": "The food was delicious and the waiter..",
|
||||||
|
"model": "sagemaker-embeddings",
|
||||||
|
}'
|
||||||
|
```
|
|
@ -96,7 +96,8 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"proxy/quick_start",
|
"proxy/quick_start",
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
|
"proxy/embedding",
|
||||||
"proxy/load_balancing",
|
"proxy/load_balancing",
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
|
|
|
@ -127,7 +127,6 @@ class CompletionCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/obse
|
||||||
assert isinstance(kwargs['original_response'], (str, litellm.CustomStreamWrapper))
|
assert isinstance(kwargs['original_response'], (str, litellm.CustomStreamWrapper))
|
||||||
assert isinstance(kwargs['additional_args'], (dict, type(None)))
|
assert isinstance(kwargs['additional_args'], (dict, type(None)))
|
||||||
assert isinstance(kwargs['log_event_type'], str)
|
assert isinstance(kwargs['log_event_type'], str)
|
||||||
assert isinstance(kwargs["cache_hit"], Optional[bool])
|
|
||||||
except:
|
except:
|
||||||
print(f"Assertion Error: {traceback.format_exc()}")
|
print(f"Assertion Error: {traceback.format_exc()}")
|
||||||
self.errors.append(traceback.format_exc())
|
self.errors.append(traceback.format_exc())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue