(Refactor) Code Quality improvement - use Common base handler for Cohere (#7117)

* fix use new format for Cohere config

* fix base llm http handler

* Litellm code qa common config (#7116)

* feat(base_llm): initial commit for common base config class

Addresses code qa critique https://github.com/andrewyng/aisuite/issues/113#issuecomment-2512369132

* feat(base_llm/): add transform request/response abstract methods to base config class

---------

Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>

* use base transform helpers

* use base_llm_http_handler for cohere

* working cohere using base llm handler

* add async cohere chat completion support on base handler

* fix completion code

* working sync cohere stream

* add async support cohere_chat

* fix types get_model_response_iterator

* async / sync tests cohere

* feat  cohere using base llm class

* fix linting errors

* fix _abc error

* add cohere params to transformation

* remove old cohere file

* fix type error

* fix merge conflicts

* fix cohere merge conflicts

* fix linting error

* fix litellm.llms.custom_httpx.http_handler.HTTPHandler.post

* fix passing cohere specific params

---------

Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
This commit is contained in:
Ishaan Jaff 2024-12-09 17:45:29 -08:00 committed by GitHub
parent 5bbf906c83
commit ff7c95694d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 933 additions and 720 deletions

View file

@ -695,79 +695,6 @@ async def test_anthropic_no_content_error():
pytest.fail(f"An unexpected error occurred - {str(e)}")
def test_completion_cohere_command_r_plus_function_call():
litellm.set_verbose = True
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
}
]
try:
# test without max tokens
response = completion(
model="command-r-plus",
messages=messages,
tools=tools,
tool_choice="auto",
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
messages.append(
response.choices[0].message.model_dump()
) # Add assistant tool invokes
tool_result = (
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
)
# Add user submitted tool results in the OpenAI format
messages.append(
{
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"role": "tool",
"name": response.choices[0].message.tool_calls[0].function.name,
"content": tool_result,
}
)
# In the second response, Cohere should deduce answer from tool results
second_response = completion(
model="command-r-plus",
messages=messages,
tools=tools,
tool_choice="auto",
force_single_step=True,
)
print(second_response)
except litellm.Timeout:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_parse_xml_params():
from litellm.llms.prompt_templates.factory import parse_xml_params
@ -2120,27 +2047,6 @@ def test_ollama_image():
# hf_test_error_logs()
# def test_completion_cohere(): # commenting out,for now as the cohere endpoint is being flaky
# try:
# litellm.CohereConfig(max_tokens=10, stop_sequences=["a"])
# response = completion(
# model="command-nightly", messages=messages, logger_fn=logger_fn
# )
# # Add any assertions here to check the response
# print(response)
# response_str = response["choices"][0]["message"]["content"]
# response_str_2 = response.choices[0].message.content
# if type(response_str) != str:
# pytest.fail(f"Error occurred: {e}")
# if type(response_str_2) != str:
# pytest.fail(f"Error occurred: {e}")
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_cohere()
def test_completion_openai():
try:
litellm.set_verbose = True
@ -3550,9 +3456,6 @@ def test_completion_bedrock_titan_null_response():
# test_completion_bedrock_claude()
# test_completion_bedrock_cohere()
# def test_completion_bedrock_claude_stream():
# print("calling claude")
# litellm.set_verbose = False
@ -3722,78 +3625,6 @@ def test_completion_anyscale_api():
# test_completion_anyscale_api()
# @pytest.mark.skip(reason="flaky test, times out frequently")
@pytest.mark.flaky(retries=6, delay=1)
def test_completion_cohere():
try:
# litellm.set_verbose=True
messages = [
{"role": "system", "content": "You're a good bot"},
{"role": "assistant", "content": [{"text": "2", "type": "text"}]},
{"role": "assistant", "content": [{"text": "3", "type": "text"}]},
{
"role": "user",
"content": "Hey",
},
]
response = completion(
model="command-r",
messages=messages,
extra_headers={"Helicone-Property-Locale": "ko"},
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# FYI - cohere_chat looks quite unstable, even when testing locally
def test_chat_completion_cohere():
try:
litellm.set_verbose = True
messages = [
{"role": "system", "content": "You're a good bot"},
{
"role": "user",
"content": "Hey",
},
]
response = completion(
model="cohere_chat/command-r",
messages=messages,
max_tokens=10,
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_chat_completion_cohere_stream():
try:
litellm.set_verbose = False
messages = [
{"role": "system", "content": "You're a good bot"},
{
"role": "user",
"content": "Hey",
},
]
response = completion(
model="cohere_chat/command-r",
messages=messages,
max_tokens=10,
stream=True,
)
print(response)
for chunk in response:
print(chunk)
except litellm.APIConnectionError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_azure_cloudflare_api():
litellm.set_verbose = True
try: