fix(proxy_server.py): return 'allowed-model-region' in headers

This commit is contained in:
Krrish Dholakia 2024-05-13 08:46:44 -07:00
parent 514c5737f8
commit c3293474dd
3 changed files with 80 additions and 64 deletions

View file

@ -406,69 +406,69 @@ replicate_models: List = [
] ]
clarifai_models: List = [ clarifai_models: List = [
'clarifai/meta.Llama-3.Llama-3-8B-Instruct', "clarifai/meta.Llama-3.Llama-3-8B-Instruct",
'clarifai/gcp.generate.gemma-1_1-7b-it', "clarifai/gcp.generate.gemma-1_1-7b-it",
'clarifai/mistralai.completion.mixtral-8x22B', "clarifai/mistralai.completion.mixtral-8x22B",
'clarifai/cohere.generate.command-r-plus', "clarifai/cohere.generate.command-r-plus",
'clarifai/databricks.drbx.dbrx-instruct', "clarifai/databricks.drbx.dbrx-instruct",
'clarifai/mistralai.completion.mistral-large', "clarifai/mistralai.completion.mistral-large",
'clarifai/mistralai.completion.mistral-medium', "clarifai/mistralai.completion.mistral-medium",
'clarifai/mistralai.completion.mistral-small', "clarifai/mistralai.completion.mistral-small",
'clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1', "clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1",
'clarifai/gcp.generate.gemma-2b-it', "clarifai/gcp.generate.gemma-2b-it",
'clarifai/gcp.generate.gemma-7b-it', "clarifai/gcp.generate.gemma-7b-it",
'clarifai/deci.decilm.deciLM-7B-instruct', "clarifai/deci.decilm.deciLM-7B-instruct",
'clarifai/mistralai.completion.mistral-7B-Instruct', "clarifai/mistralai.completion.mistral-7B-Instruct",
'clarifai/gcp.generate.gemini-pro', "clarifai/gcp.generate.gemini-pro",
'clarifai/anthropic.completion.claude-v1', "clarifai/anthropic.completion.claude-v1",
'clarifai/anthropic.completion.claude-instant-1_2', "clarifai/anthropic.completion.claude-instant-1_2",
'clarifai/anthropic.completion.claude-instant', "clarifai/anthropic.completion.claude-instant",
'clarifai/anthropic.completion.claude-v2', "clarifai/anthropic.completion.claude-v2",
'clarifai/anthropic.completion.claude-2_1', "clarifai/anthropic.completion.claude-2_1",
'clarifai/meta.Llama-2.codeLlama-70b-Python', "clarifai/meta.Llama-2.codeLlama-70b-Python",
'clarifai/meta.Llama-2.codeLlama-70b-Instruct', "clarifai/meta.Llama-2.codeLlama-70b-Instruct",
'clarifai/openai.completion.gpt-3_5-turbo-instruct', "clarifai/openai.completion.gpt-3_5-turbo-instruct",
'clarifai/meta.Llama-2.llama2-7b-chat', "clarifai/meta.Llama-2.llama2-7b-chat",
'clarifai/meta.Llama-2.llama2-13b-chat', "clarifai/meta.Llama-2.llama2-13b-chat",
'clarifai/meta.Llama-2.llama2-70b-chat', "clarifai/meta.Llama-2.llama2-70b-chat",
'clarifai/openai.chat-completion.gpt-4-turbo', "clarifai/openai.chat-completion.gpt-4-turbo",
'clarifai/microsoft.text-generation.phi-2', "clarifai/microsoft.text-generation.phi-2",
'clarifai/meta.Llama-2.llama2-7b-chat-vllm', "clarifai/meta.Llama-2.llama2-7b-chat-vllm",
'clarifai/upstage.solar.solar-10_7b-instruct', "clarifai/upstage.solar.solar-10_7b-instruct",
'clarifai/openchat.openchat.openchat-3_5-1210', "clarifai/openchat.openchat.openchat-3_5-1210",
'clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B', "clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B",
'clarifai/gcp.generate.text-bison', "clarifai/gcp.generate.text-bison",
'clarifai/meta.Llama-2.llamaGuard-7b', "clarifai/meta.Llama-2.llamaGuard-7b",
'clarifai/fblgit.una-cybertron.una-cybertron-7b-v2', "clarifai/fblgit.una-cybertron.una-cybertron-7b-v2",
'clarifai/openai.chat-completion.GPT-4', "clarifai/openai.chat-completion.GPT-4",
'clarifai/openai.chat-completion.GPT-3_5-turbo', "clarifai/openai.chat-completion.GPT-3_5-turbo",
'clarifai/ai21.complete.Jurassic2-Grande', "clarifai/ai21.complete.Jurassic2-Grande",
'clarifai/ai21.complete.Jurassic2-Grande-Instruct', "clarifai/ai21.complete.Jurassic2-Grande-Instruct",
'clarifai/ai21.complete.Jurassic2-Jumbo-Instruct', "clarifai/ai21.complete.Jurassic2-Jumbo-Instruct",
'clarifai/ai21.complete.Jurassic2-Jumbo', "clarifai/ai21.complete.Jurassic2-Jumbo",
'clarifai/ai21.complete.Jurassic2-Large', "clarifai/ai21.complete.Jurassic2-Large",
'clarifai/cohere.generate.cohere-generate-command', "clarifai/cohere.generate.cohere-generate-command",
'clarifai/wizardlm.generate.wizardCoder-Python-34B', "clarifai/wizardlm.generate.wizardCoder-Python-34B",
'clarifai/wizardlm.generate.wizardLM-70B', "clarifai/wizardlm.generate.wizardLM-70B",
'clarifai/tiiuae.falcon.falcon-40b-instruct', "clarifai/tiiuae.falcon.falcon-40b-instruct",
'clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat', "clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat",
'clarifai/gcp.generate.code-gecko', "clarifai/gcp.generate.code-gecko",
'clarifai/gcp.generate.code-bison', "clarifai/gcp.generate.code-bison",
'clarifai/mistralai.completion.mistral-7B-OpenOrca', "clarifai/mistralai.completion.mistral-7B-OpenOrca",
'clarifai/mistralai.completion.openHermes-2-mistral-7B', "clarifai/mistralai.completion.openHermes-2-mistral-7B",
'clarifai/wizardlm.generate.wizardLM-13B', "clarifai/wizardlm.generate.wizardLM-13B",
'clarifai/huggingface-research.zephyr.zephyr-7B-alpha', "clarifai/huggingface-research.zephyr.zephyr-7B-alpha",
'clarifai/wizardlm.generate.wizardCoder-15B', "clarifai/wizardlm.generate.wizardCoder-15B",
'clarifai/microsoft.text-generation.phi-1_5', "clarifai/microsoft.text-generation.phi-1_5",
'clarifai/databricks.Dolly-v2.dolly-v2-12b', "clarifai/databricks.Dolly-v2.dolly-v2-12b",
'clarifai/bigcode.code.StarCoder', "clarifai/bigcode.code.StarCoder",
'clarifai/salesforce.xgen.xgen-7b-8k-instruct', "clarifai/salesforce.xgen.xgen-7b-8k-instruct",
'clarifai/mosaicml.mpt.mpt-7b-instruct', "clarifai/mosaicml.mpt.mpt-7b-instruct",
'clarifai/anthropic.completion.claude-3-opus', "clarifai/anthropic.completion.claude-3-opus",
'clarifai/anthropic.completion.claude-3-sonnet', "clarifai/anthropic.completion.claude-3-sonnet",
'clarifai/gcp.generate.gemini-1_5-pro', "clarifai/gcp.generate.gemini-1_5-pro",
'clarifai/gcp.generate.imagen-2', "clarifai/gcp.generate.imagen-2",
'clarifai/salesforce.blip.general-english-image-caption-blip-2', "clarifai/salesforce.blip.general-english-image-caption-blip-2",
] ]

View file

@ -13,10 +13,10 @@ router_settings:
redis_host: redis redis_host: redis
# redis_password: <your redis password> # redis_password: <your redis password>
redis_port: 6379 redis_port: 6379
enable_pre_call_checks: true
litellm_settings: litellm_settings:
set_verbose: True set_verbose: True
enable_preview_features: true
# service_callback: ["prometheus_system"] # service_callback: ["prometheus_system"]
# success_callback: ["prometheus"] # success_callback: ["prometheus"]
# failure_callback: ["prometheus"] # failure_callback: ["prometheus"]

View file

@ -3762,6 +3762,7 @@ async def chat_completion(
"x-litellm-cache-key": cache_key, "x-litellm-cache-key": cache_key,
"x-litellm-model-api-base": api_base, "x-litellm-model-api-base": api_base,
"x-litellm-version": version, "x-litellm-version": version,
"x-litellm-model-region": user_api_key_dict.allowed_model_region or "",
} }
selected_data_generator = select_data_generator( selected_data_generator = select_data_generator(
response=response, response=response,
@ -3778,6 +3779,9 @@ async def chat_completion(
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version fastapi_response.headers["x-litellm-version"] = version
fastapi_response.headers["x-litellm-model-region"] = (
user_api_key_dict.allowed_model_region or ""
)
### CALL HOOKS ### - modify outgoing data ### CALL HOOKS ### - modify outgoing data
response = await proxy_logging_obj.post_call_success_hook( response = await proxy_logging_obj.post_call_success_hook(
@ -4162,6 +4166,9 @@ async def embeddings(
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version fastapi_response.headers["x-litellm-version"] = version
fastapi_response.headers["x-litellm-model-region"] = (
user_api_key_dict.allowed_model_region or ""
)
return response return response
except Exception as e: except Exception as e:
@ -4331,6 +4338,9 @@ async def image_generation(
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version fastapi_response.headers["x-litellm-version"] = version
fastapi_response.headers["x-litellm-model-region"] = (
user_api_key_dict.allowed_model_region or ""
)
return response return response
except Exception as e: except Exception as e:
@ -4524,6 +4534,9 @@ async def audio_transcriptions(
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version fastapi_response.headers["x-litellm-version"] = version
fastapi_response.headers["x-litellm-model-region"] = (
user_api_key_dict.allowed_model_region or ""
)
return response return response
except Exception as e: except Exception as e:
@ -4699,6 +4712,9 @@ async def moderations(
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version fastapi_response.headers["x-litellm-version"] = version
fastapi_response.headers["x-litellm-model-region"] = (
user_api_key_dict.allowed_model_region or ""
)
return response return response
except Exception as e: except Exception as e: