forked from phoenix/litellm-mirror
updates TG_AI
This commit is contained in:
parent
b077110da5
commit
ba6df5fbb9
6 changed files with 51 additions and 33 deletions
|
@ -1,4 +1,10 @@
|
||||||
# Generation/Completion/Chat Completion Models
|
# Supported Chat, Completion Models
|
||||||
|
|
||||||
|
## API Keys
|
||||||
|
liteLLM reads keys set in the environment variables or your Key Manager
|
||||||
|
liteLLM standardizes naming keys in the following format
|
||||||
|
`PROVIDER_API_KEY` for example `OPENAI_API_KEY` or `TOGETHERAI_API_KEY` or `HUGGINGFACE_API_KEY`. In addition to this liteLLM also allows you to use the provider specificed naming conventions for keys
|
||||||
|
Example Both `HF_TOKEN` and `HUGGINGFACE_API_KEY` will work for Hugging Face models
|
||||||
|
|
||||||
### OpenAI Chat Completion Models
|
### OpenAI Chat Completion Models
|
||||||
|
|
||||||
|
@ -49,6 +55,7 @@ VertexAI requires you to set `application_default_credentials.json`, this can be
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
|
||||||
### Hugging Face Inference API
|
### Hugging Face Inference API
|
||||||
|
@ -64,10 +71,10 @@ Here are some examples of supported models:
|
||||||
|
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
|
|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
|
||||||
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||||
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||||
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||||
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||||
### AI21 Models
|
### AI21 Models
|
||||||
|
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|
@ -82,10 +89,23 @@ Here are some examples of supported models:
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
|
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
|
||||||
|
|
||||||
### BaseTen Models
|
### Together AI Models
|
||||||
|
liteLLM supports `non-streaming` and `streaming` requests to all models on https://api.together.xyz/
|
||||||
|
|
||||||
|
# Example TogetherAI Usage - Note: liteLLM supports all models deployed on TogetherAI
|
||||||
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
||||||
|
| togethercomputer/llama-2-70b-chat | `completion('togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
| togethercomputer/LLaMA-2-13b-chat | `completion('togethercomputer/LLaMA-2-13b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
| togethercomputer/code-and-talk-v1 | `completion('togethercomputer/code-and-talk-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
| togethercomputer/creative-v1 | `completion('togethercomputer/creative-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
| togethercomputer/yourmodel | `completion('togethercomputer/yourmodel', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
|
|
||||||
|
### Baseten Models
|
||||||
Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten.
|
Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten.
|
||||||
|
|
||||||
|
# Example Baseten Usage - Note: liteLLM supports all models deployed on Basten
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|--------------------------------------------|------------------------------------|
|
|------------------|--------------------------------------------|------------------------------------|
|
||||||
| Falcon 7B | `completion(model='<your model version id>', messages=messages, custom_llm_provider="baseten")` | `os.environ['BASETEN_API_KEY']` |
|
| Falcon 7B | `completion(model='<your model version id>', messages=messages, custom_llm_provider="baseten")` | `os.environ['BASETEN_API_KEY']` |
|
||||||
|
@ -99,13 +119,13 @@ All the text models from [OpenRouter](https://openrouter.ai/docs) are supported
|
||||||
|
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||||
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
|
@ -80,35 +80,27 @@ const config = {
|
||||||
{
|
{
|
||||||
title: 'Community',
|
title: 'Community',
|
||||||
items: [
|
items: [
|
||||||
{
|
|
||||||
label: 'Stack Overflow',
|
|
||||||
href: 'https://stackoverflow.com/questions/tagged/docusaurus',
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
label: 'Discord',
|
label: 'Discord',
|
||||||
href: 'https://discordapp.com/invite/docusaurus',
|
href: 'https://discord.com/invite/wuPM9dRgDw',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Twitter',
|
label: 'Twitter',
|
||||||
href: 'https://twitter.com/docusaurus',
|
href: 'https://twitter.com/LiteLLM,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: 'More',
|
title: 'More',
|
||||||
items: [
|
items: [
|
||||||
{
|
|
||||||
label: 'Blog',
|
|
||||||
to: '/blog',
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
label: 'GitHub',
|
label: 'GitHub',
|
||||||
href: 'https://github.com/facebook/docusaurus',
|
href: 'https://github.com/BerriAI/litellm/',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
copyright: `Copyright © ${new Date().getFullYear()} My Project, Inc. Built with Docusaurus.`,
|
copyright: `Copyright © ${new Date().getFullYear()} liteLLM`,
|
||||||
},
|
},
|
||||||
prism: {
|
prism: {
|
||||||
theme: lightCodeTheme,
|
theme: lightCodeTheme,
|
||||||
|
|
|
@ -22,13 +22,18 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'completion_function',
|
label: 'completion_function',
|
||||||
items: ['completion/input', 'completion/supported','completion/output'],
|
items: ['completion/input','completion/output'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'embedding_function',
|
label: 'embedding_function',
|
||||||
items: ['embedding/supported_embedding'],
|
items: ['embedding/supported_embedding'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: 'category',
|
||||||
|
label: 'Supported Chat, Completion Models',
|
||||||
|
items: ['completion/supported'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Tutorials',
|
label: 'Tutorials',
|
||||||
|
|
|
@ -17,6 +17,7 @@ vertex_project = None
|
||||||
vertex_location = None
|
vertex_location = None
|
||||||
caching = False
|
caching = False
|
||||||
hugging_api_token = None
|
hugging_api_token = None
|
||||||
|
togetherai_api_key = None
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||||
|
|
|
@ -282,9 +282,9 @@ def completion(
|
||||||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="huggingface")
|
response = CustomStreamWrapper(model_response, model, custom_llm_provider="huggingface")
|
||||||
return response
|
return response
|
||||||
response = model_response
|
response = model_response
|
||||||
elif custom_llm_provider == "together_ai":
|
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||||
import requests
|
import requests
|
||||||
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY")
|
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY") or api_key or litellm.togetherai_api_key
|
||||||
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
||||||
endpoint = 'https://api.together.xyz/inference'
|
endpoint = 'https://api.together.xyz/inference'
|
||||||
prompt = " ".join([message["content"] for message in messages]) # TODO: Add chat support for together AI
|
prompt = " ".join([message["content"] for message in messages]) # TODO: Add chat support for together AI
|
||||||
|
|
|
@ -232,7 +232,7 @@ def test_completion_replicate_stability():
|
||||||
def test_completion_together_ai():
|
def test_completion_together_ai():
|
||||||
model_name = "togethercomputer/llama-2-70b-chat"
|
model_name = "togethercomputer/llama-2-70b-chat"
|
||||||
try:
|
try:
|
||||||
response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai")
|
response = completion(model=model_name, messages=messages)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue