This commit is contained in:
coconut49 2023-10-18 01:47:56 +08:00
commit db55dac434
No known key found for this signature in database
8 changed files with 106 additions and 18 deletions

View file

@ -1,5 +1,5 @@
# Local Debugging # Local Debugging
There's 2 ways to do local debugging - `litellm.set_verbose=True` and by passing in a custom function `completion(...logger_fn=<your_local_function>)` There's 2 ways to do local debugging - `litellm.set_verbose=True` and by passing in a custom function `completion(...logger_fn=<your_local_function>)`. Warning: Make sure to not use `set_verbose` in production. It logs API keys, which might end up in log files.
## Set Verbose ## Set Verbose

View file

@ -208,6 +208,86 @@ user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stoc
Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial. Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial.
</TabItem> </TabItem>
<TabItem value="multi-LLM AutoGen" label="AutoGen Multi-LLM">
```python
from autogen import AssistantAgent, GroupChatManager, UserProxyAgent
from autogen.agentchat import GroupChat
config_list = [
{
"model": "ollama/mistralorca",
"api_base": "http://localhost:8000", # litellm compatible endpoint
"api_type": "open_ai",
"api_key": "NULL", # just a placeholder
}
]
llm_config = {"config_list": config_list, "seed": 42}
code_config_list = [
{
"model": "ollama/phind-code",
"api_base": "http://localhost:8000", # litellm compatible endpoint
"api_type": "open_ai",
"api_key": "NULL", # just a placeholder
}
]
code_config = {"config_list": code_config_list, "seed": 42}
admin = UserProxyAgent(
name="Admin",
system_message="A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.",
llm_config=llm_config,
code_execution_config=False,
)
engineer = AssistantAgent(
name="Engineer",
llm_config=code_config,
system_message="""Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.
Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
""",
)
planner = AssistantAgent(
name="Planner",
system_message="""Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
The plan may involve an engineer who can write code and a scientist who doesn't write code.
Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist.
""",
llm_config=llm_config,
)
executor = UserProxyAgent(
name="Executor",
system_message="Executor. Execute the code written by the engineer and report the result.",
human_input_mode="NEVER",
llm_config=llm_config,
code_execution_config={"last_n_messages": 3, "work_dir": "paper"},
)
critic = AssistantAgent(
name="Critic",
system_message="Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL.",
llm_config=llm_config,
)
groupchat = GroupChat(
agents=[admin, engineer, planner, executor, critic],
messages=[],
max_round=50,
)
manager = GroupChatManager(groupchat=groupchat, llm_config=llm_config)
admin.initiate_chat(
manager,
message="""
""",
)
```
Credits [@Nathan](https://gist.github.com/CUexter) for this tutorial.
</TabItem>
<TabItem value="langroid" label="Langroid"> <TabItem value="langroid" label="Langroid">
```python ```python

View file

@ -121,7 +121,7 @@ def completion(
sum_logprob = 0 sum_logprob = 0
for token in completion_response[0]["details"]["tokens"]: for token in completion_response[0]["details"]["tokens"]:
sum_logprob += token["logprob"] sum_logprob += token["logprob"]
model_response["choices"][0]["message"]["logprobs"] = sum_logprob model_response["choices"][0]["message"]._logprobs = sum_logprob
else: else:
raise BasetenError( raise BasetenError(
message=f"Unable to parse response. Original response: {response.text}", message=f"Unable to parse response. Original response: {response.text}",

View file

@ -141,7 +141,6 @@ def completion(
litellm_params=None, litellm_params=None,
logger_fn=None, logger_fn=None,
): ):
print(f'headers inside hf rest api: {headers}')
headers = validate_environment(api_key, headers) headers = validate_environment(api_key, headers)
task = get_hf_task_for_model(model) task = get_hf_task_for_model(model)
print_verbose(f"{model}, {task}") print_verbose(f"{model}, {task}")
@ -254,8 +253,6 @@ def completion(
## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten) ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
is_streamed = False is_streamed = False
print(f"response keys: {response.__dict__.keys()}")
print(f"response keys: {response.__dict__['headers']}")
if response.__dict__['headers']["Content-Type"] == "text/event-stream": if response.__dict__['headers']["Content-Type"] == "text/event-stream":
is_streamed = True is_streamed = True
@ -313,7 +310,7 @@ def completion(
sum_logprob = 0 sum_logprob = 0
for token in completion_response[0]["details"]["tokens"]: for token in completion_response[0]["details"]["tokens"]:
sum_logprob += token["logprob"] sum_logprob += token["logprob"]
model_response["choices"][0]["message"]["logprobs"] = sum_logprob model_response["choices"][0]["message"]._logprob = sum_logprob
if "best_of" in optional_params and optional_params["best_of"] > 1: if "best_of" in optional_params and optional_params["best_of"] > 1:
if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]: if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
choices_list = [] choices_list = []
@ -337,9 +334,14 @@ def completion(
prompt_tokens = len( prompt_tokens = len(
encoding.encode(input_text) encoding.encode(input_text)
) ##[TODO] use the llama2 tokenizer here ) ##[TODO] use the llama2 tokenizer here
completion_tokens = len( print_verbose(f'output: {model_response["choices"][0]["message"]}')
encoding.encode(model_response["choices"][0]["message"].get("content", "")) output_text = model_response["choices"][0]["message"].get("content", "")
) ##[TODO] use the llama2 tokenizer here if output_text is not None and len(output_text) > 0:
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
) ##[TODO] use the llama2 tokenizer here
else:
completion_tokens = 0
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model

View file

@ -729,7 +729,6 @@ def completion(
headers headers
or litellm.headers or litellm.headers
) )
print(f'headers before hf rest api: {hf_headers}')
model_response = huggingface_restapi.completion( model_response = huggingface_restapi.completion(
model=model, model=model,
messages=messages, messages=messages,

View file

@ -9,6 +9,7 @@ import backoff
import openai.error import openai.error
import litellm import litellm
from litellm.utils import trim_messages
import litellm.exceptions import litellm.exceptions
cost_dict: Dict[str, Dict[str, float]] = defaultdict(dict) cost_dict: Dict[str, Dict[str, float]] = defaultdict(dict)

View file

@ -2,6 +2,7 @@ import sys, os, platform, time, copy
import threading import threading
import shutil, random, traceback import shutil, random, traceback
messages = []
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path - for litellm local dev ) # Adds the parent directory to the system path - for litellm local dev
@ -72,6 +73,7 @@ print()
import litellm import litellm
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.routing import APIRouter from fastapi.routing import APIRouter
from fastapi.encoders import jsonable_encoder
from fastapi.responses import StreamingResponse, FileResponse from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
import json import json
@ -111,6 +113,13 @@ def print_verbose(print_statement):
print(print_statement) print(print_statement)
def find_avatar_url(role):
role = role.replace(" ", "%20")
avatar_filename = f"avatars/{role}.png"
avatar_url = f"/static/{avatar_filename}"
return avatar_url
def usage_telemetry( def usage_telemetry(
feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
if user_telemetry: if user_telemetry:
@ -461,24 +470,21 @@ def model_list():
) )
@router.post("/v1/completions")
@router.post("/completions") @router.post("/completions")
async def completion(request: Request): async def completion(request: Request):
data = await request.json() data = await request.json()
print_verbose(f"data passed in: {data}")
return litellm_completion(data=data, type="completion", user_model=user_model, user_temperature=user_temperature, return litellm_completion(data=data, type="completion", user_model=user_model, user_temperature=user_temperature,
user_max_tokens=user_max_tokens, user_api_base=user_api_base, user_headers=user_headers, user_max_tokens=user_max_tokens, user_api_base=user_api_base, user_headers=user_headers,
user_debug=user_debug) user_debug=user_debug)
@router.post("/v1/chat/completions")
@router.post("/chat/completions") @router.post("/chat/completions")
async def chat_completion(request: Request): async def chat_completion(request: Request):
data = await request.json() data = await request.json()
print_verbose(f"data passed in: {data}") print_verbose(f"data passed in: {data}")
return litellm_completion(data, type="chat_completion", user_model=user_model, return litellm_completion(data, type="chat_completion", user_model=user_model,
user_temperature=user_temperature, user_max_tokens=user_max_tokens, user_temperature=user_temperature, user_max_tokens=user_max_tokens,
user_api_base=user_api_base, user_headers=user_headers, user_debug=user_debug) user_api_base=user_api_base, user_headers=user_headers, user_debug=user_debug)
def print_cost_logs(): def print_cost_logs():

View file

@ -119,7 +119,7 @@ class Message(OpenAIObject):
super(Message, self).__init__(**params) super(Message, self).__init__(**params)
self.content = content self.content = content
self.role = role self.role = role
self.logprobs = logprobs self._logprobs = logprobs
class Delta(OpenAIObject): class Delta(OpenAIObject):
def __init__(self, content=None, logprobs=None, role=None, **params): def __init__(self, content=None, logprobs=None, role=None, **params):