LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)

* LiteLLM Minor Fixes & Improvements (09/23/2024)  (#5842)

* feat(auth_utils.py): enable admin to allow client-side credentials to be passed

Makes it easier for devs to experiment with finetuned fireworks ai models

* feat(router.py): allow setting configurable_clientside_auth_params for a model

Closes https://github.com/BerriAI/litellm/issues/5843

* build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit

Fixes https://github.com/BerriAI/litellm/issues/5850

* fix(azure_ai/): support content list for azure ai

Fixes https://github.com/BerriAI/litellm/issues/4237

* fix(litellm_logging.py): always set saved_cache_cost

Set to 0 by default

* fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing

handles calling 405b+ size models

* fix(slack_alerting.py): fix error alerting for failed spend tracking

Fixes regression with slack alerting error monitoring

* fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error

* docs(bedrock.md): add llama3-1 models

* test: fix tests

* fix(azure_ai/chat): fix transformation for azure ai calls

* feat(azure_ai/embed): Add azure ai embeddings support

Closes https://github.com/BerriAI/litellm/issues/5861

* fix(azure_ai/embed): enable async embedding

* feat(azure_ai/embed): support azure ai multimodal embeddings

* fix(azure_ai/embed): support async multi modal embeddings

* feat(together_ai/embed): support together ai embedding calls

* feat(rerank/main.py): log source documents for rerank endpoints to langfuse

improves rerank endpoint logging

* fix(langfuse.py): support logging `/audio/speech` input to langfuse

* test(test_embedding.py): fix test

* test(test_completion_cost.py): fix helper util
This commit is contained in:
Krish Dholakia 2024-09-25 22:11:57 -07:00 committed by GitHub
parent 5bc5eaff8a
commit 16c0307eab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 1675 additions and 340 deletions

View file

@ -104,14 +104,131 @@ def test_openai_embedding_3():
pytest.fail(f"Error occurred: {e}")
def test_openai_azure_embedding_simple():
@pytest.mark.parametrize(
"model, api_base, api_key",
[
# ("azure/azure-embedding-model", None, None),
("together_ai/togethercomputer/m2-bert-80M-8k-retrieval", None, None),
],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode):
try:
litellm.set_verbose = True
response = embedding(
model="azure/azure-embedding-model",
input=["good morning from litellm"],
)
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
# litellm.set_verbose = True
if sync_mode:
response = embedding(
model=model,
input=["good morning from litellm"],
api_base=api_base,
api_key=api_key,
)
else:
response = await litellm.aembedding(
model=model,
input=["good morning from litellm"],
api_base=api_base,
api_key=api_key,
)
# print(await response)
print(response)
print(response._hidden_params)
response_keys = set(dict(response).keys())
response_keys.discard("_response_ms")
assert set(["usage", "model", "object", "data"]) == set(
response_keys
) # assert litellm response has expected keys from OpenAI embedding response
request_cost = litellm.completion_cost(
completion_response=response, call_type="embedding"
)
print("Calculated request cost=", request_cost)
assert isinstance(response.usage, litellm.Usage)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_openai_azure_embedding_simple()
import base64
import requests
litellm.set_verbose = True
url = "https://dummyimage.com/100/100/fff&text=Test+image"
response = requests.get(url)
file_data = response.content
encoded_file = base64.b64encode(file_data).decode("utf-8")
base64_image = f"data:image/png;base64,{encoded_file}"
from openai.types.embedding import Embedding
def _azure_ai_image_mock_response(*args, **kwargs):
new_response = MagicMock()
new_response.headers = {"azureml-model-group": "offer-cohere-embed-multili-paygo"}
new_response.json.return_value = {
"data": [Embedding(embedding=[1234], index=0, object="embedding")],
"model": "",
"object": "list",
"usage": {"prompt_tokens": 1, "total_tokens": 2},
}
return new_response
@pytest.mark.parametrize(
"model, api_base, api_key",
[
(
"azure_ai/Cohere-embed-v3-multilingual-jzu",
"https://Cohere-embed-v3-multilingual-jzu.eastus2.models.ai.azure.com",
os.getenv("AZURE_AI_COHERE_API_KEY_2"),
)
],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode):
try:
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
input = base64_image
if sync_mode:
client = HTTPHandler()
else:
client = AsyncHTTPHandler()
with patch.object(
client, "post", side_effect=_azure_ai_image_mock_response
) as mock_client:
if sync_mode:
response = embedding(
model=model,
input=[input],
api_base=api_base,
api_key=api_key,
client=client,
)
else:
response = await litellm.aembedding(
model=model,
input=[input],
api_base=api_base,
api_key=api_key,
client=client,
)
print(response)
assert len(response.data) == 1
print(response._hidden_params)
response_keys = set(dict(response).keys())
response_keys.discard("_response_ms")
assert set(["usage", "model", "object", "data"]) == set(
@ -128,9 +245,6 @@ def test_openai_azure_embedding_simple():
pytest.fail(f"Error occurred: {e}")
# test_openai_azure_embedding_simple()
def test_openai_azure_embedding_timeouts():
try:
response = embedding(
@ -226,13 +340,16 @@ def test_openai_azure_embedding_with_oidc_and_cf():
os.environ["AZURE_API_KEY"] = old_key
from openai.types.embedding import Embedding
def _openai_mock_response(*args, **kwargs):
new_response = MagicMock()
new_response.headers = {"hello": "world"}
new_response.parse.return_value = (
openai.types.create_embedding_response.CreateEmbeddingResponse(
data=[],
data=[Embedding(embedding=[1234, 45667], index=0, object="embedding")],
model="azure/test",
object="list",
usage=openai.types.create_embedding_response.Usage(
@ -267,20 +384,28 @@ def test_openai_azure_embedding_optional_arg():
# test_openai_embedding()
@pytest.mark.parametrize(
"model, api_base",
[
("embed-english-v2.0", None),
],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cohere_embedding(sync_mode):
async def test_cohere_embedding(sync_mode, model, api_base):
try:
# litellm.set_verbose=True
data = {
"model": "embed-english-v2.0",
"model": model,
"input": ["good morning from litellm", "this is another item"],
"input_type": "search_query",
"api_base": api_base,
}
if sync_mode:
response = embedding(**data)
else:
response = await litellm.aembedding(**data)
print(f"response:", response)
assert isinstance(response.usage, litellm.Usage)