mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Litellm dev 12 13 2024 p1 (#7219)
* fix(litellm_logging.py): pass user metadata to langsmith on sdk calls * fix(litellm_logging.py): pass nested user metadata to logging integration - e.g. langsmith * fix(exception_mapping_utils.py): catch and clarify watsonx `/text/chat` endpoint not supported error message. Closes https://github.com/BerriAI/litellm/issues/7213 * fix(watsonx/common_utils.py): accept new 'WATSONX_IAM_URL' env var allows user to use local watsonx Fixes https://github.com/BerriAI/litellm/issues/4991 * fix(litellm_logging.py): cleanup unused function * test: skip bad ibm test
This commit is contained in:
parent
30e147a315
commit
b150faff90
7 changed files with 63 additions and 16 deletions
|
@ -31,14 +31,16 @@ from litellm import completion
|
|||
os.environ["WATSONX_URL"] = ""
|
||||
os.environ["WATSONX_APIKEY"] = ""
|
||||
|
||||
## Call WATSONX `/text/chat` endpoint - supports function calling
|
||||
response = completion(
|
||||
model="watsonx/ibm/granite-13b-chat-v2",
|
||||
model="watsonx/meta-llama/llama-3-1-8b-instruct",
|
||||
messages=[{ "content": "what is your favorite colour?","role": "user"}],
|
||||
project_id="<my-project-id>" # or pass with os.environ["WATSONX_PROJECT_ID"]
|
||||
)
|
||||
|
||||
## Call WATSONX `/text/generation` endpoint - not all models support /chat route.
|
||||
response = completion(
|
||||
model="watsonx/meta-llama/llama-3-8b-instruct",
|
||||
model="watsonx/ibm/granite-13b-chat-v2",
|
||||
messages=[{ "content": "what is your favorite colour?","role": "user"}],
|
||||
project_id="<my-project-id>"
|
||||
)
|
||||
|
@ -54,7 +56,7 @@ os.environ["WATSONX_APIKEY"] = ""
|
|||
os.environ["WATSONX_PROJECT_ID"] = ""
|
||||
|
||||
response = completion(
|
||||
model="watsonx/ibm/granite-13b-chat-v2",
|
||||
model="watsonx/meta-llama/llama-3-1-8b-instruct",
|
||||
messages=[{ "content": "what is your favorite colour?","role": "user"}],
|
||||
stream=True
|
||||
)
|
||||
|
|
|
@ -656,6 +656,13 @@ def exception_type( # type: ignore # noqa: PLR0915
|
|||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
elif "model_no_support_for_function" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise BadRequestError(
|
||||
message=f"{custom_llm_provider}Exception - Use 'watsonx_text' route instead. IBM WatsonX does not support `/text/chat` endpoint. - {error_str}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
elif hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 500:
|
||||
exception_mapping_worked = True
|
||||
|
|
|
@ -2584,6 +2584,15 @@ class StandardLoggingPayloadSetup:
|
|||
clean_metadata["user_api_key_hash"] = metadata.get(
|
||||
"user_api_key"
|
||||
) # this is the hash
|
||||
_potential_requester_metadata = metadata.get(
|
||||
"metadata", None
|
||||
) # check if user passed metadata in the sdk request - e.g. metadata for langsmith logging - https://docs.litellm.ai/docs/observability/langsmith_integration#set-langsmith-fields
|
||||
if (
|
||||
clean_metadata["requester_metadata"] is None
|
||||
and _potential_requester_metadata is not None
|
||||
and isinstance(_potential_requester_metadata, dict)
|
||||
):
|
||||
clean_metadata["requester_metadata"] = _potential_requester_metadata
|
||||
return clean_metadata
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -23,6 +23,12 @@ class WatsonXAIError(BaseLLMException):
|
|||
iam_token_cache = InMemoryCache()
|
||||
|
||||
|
||||
def get_watsonx_iam_url():
|
||||
return (
|
||||
get_secret_str("WATSONX_IAM_URL") or "https://iam.cloud.ibm.com/identity/token"
|
||||
)
|
||||
|
||||
|
||||
def generate_iam_token(api_key=None, **params) -> str:
|
||||
result: Optional[str] = iam_token_cache.get_cache(api_key) # type: ignore
|
||||
|
||||
|
@ -38,15 +44,14 @@ def generate_iam_token(api_key=None, **params) -> str:
|
|||
"grant_type": "urn:ibm:params:oauth:grant-type:apikey",
|
||||
"apikey": api_key,
|
||||
}
|
||||
iam_token_url = get_watsonx_iam_url()
|
||||
verbose_logger.debug(
|
||||
"calling ibm `/identity/token` to retrieve IAM token.\nURL=%s\nheaders=%s\ndata=%s",
|
||||
"https://iam.cloud.ibm.com/identity/token",
|
||||
iam_token_url,
|
||||
headers,
|
||||
data,
|
||||
)
|
||||
response = httpx.post(
|
||||
"https://iam.cloud.ibm.com/identity/token", data=data, headers=headers
|
||||
)
|
||||
response = httpx.post(iam_token_url, data=data, headers=headers)
|
||||
response.raise_for_status()
|
||||
json_data = response.json()
|
||||
|
||||
|
|
|
@ -10,3 +10,6 @@ model_list:
|
|||
model: "*"
|
||||
model_info:
|
||||
access_groups: ["default"]
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langsmith"]
|
|
@ -3977,10 +3977,11 @@ def test_completion_deepseek():
|
|||
|
||||
|
||||
@pytest.mark.skip(reason="Account deleted by IBM.")
|
||||
def test_completion_watsonx():
|
||||
def test_completion_watsonx_error():
|
||||
litellm.set_verbose = True
|
||||
model_name = "watsonx/ibm/granite-13b-chat-v2"
|
||||
try:
|
||||
|
||||
with pytest.raises(litellm.BadRequestError) as e:
|
||||
response = completion(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
|
@ -3989,12 +3990,8 @@ def test_completion_watsonx():
|
|||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except litellm.APIError as e:
|
||||
pass
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
assert "use 'watsonx_text' route instead" in str(e).lower()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skip test. account deleted.")
|
||||
|
|
|
@ -448,3 +448,27 @@ def test_get_response_time():
|
|||
|
||||
# For streaming, should return completion_start_time - start_time
|
||||
assert response_time == 2.0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metadata, expected_requester_metadata",
|
||||
[
|
||||
({"metadata": {"test": "test2"}}, {"test": "test2"}),
|
||||
({"metadata": {"test": "test2"}, "model_id": "test-model"}, {"test": "test2"}),
|
||||
(
|
||||
{
|
||||
"metadata": {
|
||||
"test": "test2",
|
||||
},
|
||||
"model_id": "test-model",
|
||||
"requester_metadata": {"test": "test2"},
|
||||
},
|
||||
{"test": "test2"},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_standard_logging_metadata_requester_metadata(
|
||||
metadata, expected_requester_metadata
|
||||
):
|
||||
result = StandardLoggingPayloadSetup.get_standard_logging_metadata(metadata)
|
||||
assert result["requester_metadata"] == expected_requester_metadata
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue