mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)
* LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842) * feat(auth_utils.py): enable admin to allow client-side credentials to be passed Makes it easier for devs to experiment with finetuned fireworks ai models * feat(router.py): allow setting configurable_clientside_auth_params for a model Closes https://github.com/BerriAI/litellm/issues/5843 * build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit Fixes https://github.com/BerriAI/litellm/issues/5850 * fix(azure_ai/): support content list for azure ai Fixes https://github.com/BerriAI/litellm/issues/4237 * fix(litellm_logging.py): always set saved_cache_cost Set to 0 by default * fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing handles calling 405b+ size models * fix(slack_alerting.py): fix error alerting for failed spend tracking Fixes regression with slack alerting error monitoring * fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error * docs(bedrock.md): add llama3-1 models * test: fix tests * fix(azure_ai/chat): fix transformation for azure ai calls * feat(azure_ai/embed): Add azure ai embeddings support Closes https://github.com/BerriAI/litellm/issues/5861 * fix(azure_ai/embed): enable async embedding * feat(azure_ai/embed): support azure ai multimodal embeddings * fix(azure_ai/embed): support async multi modal embeddings * feat(together_ai/embed): support together ai embedding calls * feat(rerank/main.py): log source documents for rerank endpoints to langfuse improves rerank endpoint logging * fix(langfuse.py): support logging `/audio/speech` input to langfuse * test(test_embedding.py): fix test * test(test_completion_cost.py): fix helper util
This commit is contained in:
parent
5bc5eaff8a
commit
16c0307eab
25 changed files with 1675 additions and 340 deletions
|
@ -104,14 +104,131 @@ def test_openai_embedding_3():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_openai_azure_embedding_simple():
|
||||
@pytest.mark.parametrize(
|
||||
"model, api_base, api_key",
|
||||
[
|
||||
# ("azure/azure-embedding-model", None, None),
|
||||
("together_ai/togethercomputer/m2-bert-80M-8k-retrieval", None, None),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
response = embedding(
|
||||
model="azure/azure-embedding-model",
|
||||
input=["good morning from litellm"],
|
||||
)
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
# litellm.set_verbose = True
|
||||
if sync_mode:
|
||||
response = embedding(
|
||||
model=model,
|
||||
input=["good morning from litellm"],
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
)
|
||||
else:
|
||||
response = await litellm.aembedding(
|
||||
model=model,
|
||||
input=["good morning from litellm"],
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
)
|
||||
# print(await response)
|
||||
print(response)
|
||||
print(response._hidden_params)
|
||||
response_keys = set(dict(response).keys())
|
||||
response_keys.discard("_response_ms")
|
||||
assert set(["usage", "model", "object", "data"]) == set(
|
||||
response_keys
|
||||
) # assert litellm response has expected keys from OpenAI embedding response
|
||||
|
||||
request_cost = litellm.completion_cost(
|
||||
completion_response=response, call_type="embedding"
|
||||
)
|
||||
|
||||
print("Calculated request cost=", request_cost)
|
||||
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_openai_azure_embedding_simple()
|
||||
import base64
|
||||
|
||||
import requests
|
||||
|
||||
litellm.set_verbose = True
|
||||
url = "https://dummyimage.com/100/100/fff&text=Test+image"
|
||||
response = requests.get(url)
|
||||
file_data = response.content
|
||||
|
||||
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||
base64_image = f"data:image/png;base64,{encoded_file}"
|
||||
|
||||
|
||||
from openai.types.embedding import Embedding
|
||||
|
||||
|
||||
def _azure_ai_image_mock_response(*args, **kwargs):
|
||||
new_response = MagicMock()
|
||||
new_response.headers = {"azureml-model-group": "offer-cohere-embed-multili-paygo"}
|
||||
|
||||
new_response.json.return_value = {
|
||||
"data": [Embedding(embedding=[1234], index=0, object="embedding")],
|
||||
"model": "",
|
||||
"object": "list",
|
||||
"usage": {"prompt_tokens": 1, "total_tokens": 2},
|
||||
}
|
||||
|
||||
return new_response
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, api_base, api_key",
|
||||
[
|
||||
(
|
||||
"azure_ai/Cohere-embed-v3-multilingual-jzu",
|
||||
"https://Cohere-embed-v3-multilingual-jzu.eastus2.models.ai.azure.com",
|
||||
os.getenv("AZURE_AI_COHERE_API_KEY_2"),
|
||||
)
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode):
|
||||
try:
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
input = base64_image
|
||||
if sync_mode:
|
||||
client = HTTPHandler()
|
||||
else:
|
||||
client = AsyncHTTPHandler()
|
||||
with patch.object(
|
||||
client, "post", side_effect=_azure_ai_image_mock_response
|
||||
) as mock_client:
|
||||
if sync_mode:
|
||||
response = embedding(
|
||||
model=model,
|
||||
input=[input],
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
client=client,
|
||||
)
|
||||
else:
|
||||
response = await litellm.aembedding(
|
||||
model=model,
|
||||
input=[input],
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
client=client,
|
||||
)
|
||||
print(response)
|
||||
|
||||
assert len(response.data) == 1
|
||||
|
||||
print(response._hidden_params)
|
||||
response_keys = set(dict(response).keys())
|
||||
response_keys.discard("_response_ms")
|
||||
assert set(["usage", "model", "object", "data"]) == set(
|
||||
|
@ -128,9 +245,6 @@ def test_openai_azure_embedding_simple():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_openai_azure_embedding_simple()
|
||||
|
||||
|
||||
def test_openai_azure_embedding_timeouts():
|
||||
try:
|
||||
response = embedding(
|
||||
|
@ -226,13 +340,16 @@ def test_openai_azure_embedding_with_oidc_and_cf():
|
|||
os.environ["AZURE_API_KEY"] = old_key
|
||||
|
||||
|
||||
from openai.types.embedding import Embedding
|
||||
|
||||
|
||||
def _openai_mock_response(*args, **kwargs):
|
||||
new_response = MagicMock()
|
||||
new_response.headers = {"hello": "world"}
|
||||
|
||||
new_response.parse.return_value = (
|
||||
openai.types.create_embedding_response.CreateEmbeddingResponse(
|
||||
data=[],
|
||||
data=[Embedding(embedding=[1234, 45667], index=0, object="embedding")],
|
||||
model="azure/test",
|
||||
object="list",
|
||||
usage=openai.types.create_embedding_response.Usage(
|
||||
|
@ -267,20 +384,28 @@ def test_openai_azure_embedding_optional_arg():
|
|||
# test_openai_embedding()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, api_base",
|
||||
[
|
||||
("embed-english-v2.0", None),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embedding(sync_mode):
|
||||
async def test_cohere_embedding(sync_mode, model, api_base):
|
||||
try:
|
||||
# litellm.set_verbose=True
|
||||
data = {
|
||||
"model": "embed-english-v2.0",
|
||||
"model": model,
|
||||
"input": ["good morning from litellm", "this is another item"],
|
||||
"input_type": "search_query",
|
||||
"api_base": api_base,
|
||||
}
|
||||
if sync_mode:
|
||||
response = embedding(**data)
|
||||
else:
|
||||
response = await litellm.aembedding(**data)
|
||||
|
||||
print(f"response:", response)
|
||||
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue