diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md index 5e2bd6079..c5988940d 100644 --- a/docs/my-website/docs/completion/input.md +++ b/docs/my-website/docs/completion/input.md @@ -229,399 +229,3 @@ def completion( - `hf_model_name`: *string (optional)* - [Sagemaker Only] The corresponding huggingface name of the model, used to pull the right chat template for the model. - -## Provider-specific Params -Providers might offer params not supported by OpenAI (e.g. top_k). You can pass those in 2 ways: -- via completion(): We'll pass the non-openai param, straight to the provider as part of the request body. - - e.g. `completion(model="claude-instant-1", top_k=3)` -- via provider-specific config variable (e.g. `litellm.OpenAIConfig()`). - - - - -```python -import litellm, os - -# set env variables -os.environ["OPENAI_API_KEY"] = "your-openai-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="gpt-3.5-turbo", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.OpenAIConfig(max_tokens=10) - -response_2 = litellm.completion( - model="gpt-3.5-turbo", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - -```python -import litellm, os - -# set env variables -os.environ["OPENAI_API_KEY"] = "your-openai-key" - - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="text-davinci-003", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.OpenAITextCompletionConfig(max_tokens=10) -response_2 = litellm.completion( - model="text-davinci-003", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - -```python -import litellm, os - -# set env variables -os.environ["AZURE_API_BASE"] = "your-azure-api-base" -os.environ["AZURE_API_TYPE"] = "azure" # [OPTIONAL] -os.environ["AZURE_API_VERSION"] = "2023-07-01-preview" # [OPTIONAL] - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="azure/chatgpt-v-2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.AzureOpenAIConfig(max_tokens=10) -response_2 = litellm.completion( - model="azure/chatgpt-v-2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - -```python -import litellm, os - -# set env variables -os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="claude-instant-1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.AnthropicConfig(max_tokens_to_sample=200) -response_2 = litellm.completion( - model="claude-instant-1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - -```python -import litellm, os - -# set env variables -os.environ["HUGGINGFACE_API_KEY"] = "your-huggingface-key" #[OPTIONAL] - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://your-huggingface-api-endpoint", - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.HuggingfaceConfig(max_new_tokens=200) -response_2 = litellm.completion( - model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://your-huggingface-api-endpoint" - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - - -```python -import litellm, os - -# set env variables -os.environ["TOGETHERAI_API_KEY"] = "your-togetherai-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="together_ai/togethercomputer/llama-2-70b-chat", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.TogetherAIConfig(max_tokens_to_sample=200) -response_2 = litellm.completion( - model="together_ai/togethercomputer/llama-2-70b-chat", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - -```python -import litellm, os - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="ollama/llama2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.OllamConfig(num_predict=200) -response_2 = litellm.completion( - model="ollama/llama2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - -```python -import litellm, os - -# set env variables -os.environ["REPLICATE_API_KEY"] = "your-replicate-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.ReplicateConfig(max_new_tokens=200) -response_2 = litellm.completion( - model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - - -```python -import litellm - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="petals/petals-team/StableBeluga2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://chat.petals.dev/api/v1/generate", - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.PetalsConfig(max_new_tokens=10) -response_2 = litellm.completion( - model="petals/petals-team/StableBeluga2", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://chat.petals.dev/api/v1/generate", - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - -```python -import litellm, os - -# set env variables -os.environ["PALM_API_KEY"] = "your-palm-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="palm/chat-bison", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.PalmConfig(maxOutputTokens=10) -response_2 = litellm.completion( - model="palm/chat-bison", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - -```python -import litellm, os - -# set env variables -os.environ["AI21_API_KEY"] = "your-ai21-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="j2-mid", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.AI21Config(maxOutputTokens=10) -response_2 = litellm.completion( - model="j2-mid", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - -```python -import litellm, os - -# set env variables -os.environ["COHERE_API_KEY"] = "your-cohere-key" - -## SET MAX TOKENS - via completion() -response_1 = litellm.completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - max_tokens=10 - ) - -response_1_text = response_1.choices[0].message.content - -## SET MAX TOKENS - via config -litellm.CohereConfig(max_tokens=200) -response_2 = litellm.completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - ) - -response_2_text = response_2.choices[0].message.content - -## TEST OUTPUT -assert len(response_2_text) > len(response_1_text) -``` - - - - - - -[**Check out the tutorial!**](../tutorials/provider_specific_params.md) diff --git a/docs/my-website/docs/completion/provider_specific_params.md b/docs/my-website/docs/completion/provider_specific_params.md new file mode 100644 index 000000000..a8307fc8a --- /dev/null +++ b/docs/my-website/docs/completion/provider_specific_params.md @@ -0,0 +1,436 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Provider-specific Params + +Providers might offer params not supported by OpenAI (e.g. top_k). LiteLLM treats any non-openai param, as a provider-specific param, and passes it to the provider in the request body, as a kwarg. [**See Reserved Params**](https://github.com/BerriAI/litellm/blob/aa2fd29e48245f360e771a8810a69376464b195e/litellm/main.py#L700) + +You can pass those in 2 ways: +- via completion(): We'll pass the non-openai param, straight to the provider as part of the request body. + - e.g. `completion(model="claude-instant-1", top_k=3)` +- via provider-specific config variable (e.g. `litellm.OpenAIConfig()`). + +## SDK Usage + + + +```python +import litellm, os + +# set env variables +os.environ["OPENAI_API_KEY"] = "your-openai-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="gpt-3.5-turbo", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.OpenAIConfig(max_tokens=10) + +response_2 = litellm.completion( + model="gpt-3.5-turbo", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + +```python +import litellm, os + +# set env variables +os.environ["OPENAI_API_KEY"] = "your-openai-key" + + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="text-davinci-003", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.OpenAITextCompletionConfig(max_tokens=10) +response_2 = litellm.completion( + model="text-davinci-003", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + +```python +import litellm, os + +# set env variables +os.environ["AZURE_API_BASE"] = "your-azure-api-base" +os.environ["AZURE_API_TYPE"] = "azure" # [OPTIONAL] +os.environ["AZURE_API_VERSION"] = "2023-07-01-preview" # [OPTIONAL] + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="azure/chatgpt-v-2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.AzureOpenAIConfig(max_tokens=10) +response_2 = litellm.completion( + model="azure/chatgpt-v-2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + +```python +import litellm, os + +# set env variables +os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="claude-instant-1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.AnthropicConfig(max_tokens_to_sample=200) +response_2 = litellm.completion( + model="claude-instant-1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + +```python +import litellm, os + +# set env variables +os.environ["HUGGINGFACE_API_KEY"] = "your-huggingface-key" #[OPTIONAL] + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + api_base="https://your-huggingface-api-endpoint", + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.HuggingfaceConfig(max_new_tokens=200) +response_2 = litellm.completion( + model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + api_base="https://your-huggingface-api-endpoint" + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + + +```python +import litellm, os + +# set env variables +os.environ["TOGETHERAI_API_KEY"] = "your-togetherai-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="together_ai/togethercomputer/llama-2-70b-chat", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.TogetherAIConfig(max_tokens_to_sample=200) +response_2 = litellm.completion( + model="together_ai/togethercomputer/llama-2-70b-chat", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + +```python +import litellm, os + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="ollama/llama2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.OllamConfig(num_predict=200) +response_2 = litellm.completion( + model="ollama/llama2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + +```python +import litellm, os + +# set env variables +os.environ["REPLICATE_API_KEY"] = "your-replicate-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.ReplicateConfig(max_new_tokens=200) +response_2 = litellm.completion( + model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + + +```python +import litellm + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="petals/petals-team/StableBeluga2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + api_base="https://chat.petals.dev/api/v1/generate", + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.PetalsConfig(max_new_tokens=10) +response_2 = litellm.completion( + model="petals/petals-team/StableBeluga2", + messages=[{ "content": "Hello, how are you?","role": "user"}], + api_base="https://chat.petals.dev/api/v1/generate", + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + +```python +import litellm, os + +# set env variables +os.environ["PALM_API_KEY"] = "your-palm-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="palm/chat-bison", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.PalmConfig(maxOutputTokens=10) +response_2 = litellm.completion( + model="palm/chat-bison", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + +```python +import litellm, os + +# set env variables +os.environ["AI21_API_KEY"] = "your-ai21-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="j2-mid", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.AI21Config(maxOutputTokens=10) +response_2 = litellm.completion( + model="j2-mid", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + +```python +import litellm, os + +# set env variables +os.environ["COHERE_API_KEY"] = "your-cohere-key" + +## SET MAX TOKENS - via completion() +response_1 = litellm.completion( + model="command-nightly", + messages=[{ "content": "Hello, how are you?","role": "user"}], + max_tokens=10 + ) + +response_1_text = response_1.choices[0].message.content + +## SET MAX TOKENS - via config +litellm.CohereConfig(max_tokens=200) +response_2 = litellm.completion( + model="command-nightly", + messages=[{ "content": "Hello, how are you?","role": "user"}], + ) + +response_2_text = response_2.choices[0].message.content + +## TEST OUTPUT +assert len(response_2_text) > len(response_1_text) +``` + + + + + + +[**Check out the tutorial!**](../tutorials/provider_specific_params.md) + + +## Proxy Usage + +**via Config** + +```yaml +model_list: + - model_name: llama-3-8b-instruct + litellm_params: + model: predibase/llama-3-8b-instruct + api_key: os.environ/PREDIBASE_API_KEY + tenant_id: os.environ/PREDIBASE_TENANT_ID + max_tokens: 256 + adapter_base: # 👈 PROVIDER-SPECIFIC PARAM +``` + +**via Request** + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-D '{ + "model": "llama-3-8b-instruct", + "messages": [ + { + "role": "user", + "content": "What'\''s the weather like in Boston today?" + } + ], + "adapater_id": "my-special-adapter-id" # 👈 PROVIDER-SPECIFIC PARAM + }' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 3f52111bd..b1853bc18 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -90,6 +90,7 @@ const sidebars = { }, items: [ "completion/input", + "completion/provider_specific_params", "completion/drop_params", "completion/prompt_formatting", "completion/output",