mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
docs(proxy_server.md): update proxy server docs to include multi-agent autogen tutorial
This commit is contained in:
parent
ed7cf37e68
commit
dcb866b353
9 changed files with 122 additions and 19 deletions
|
@ -141,7 +141,6 @@ def completion(
|
|||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
):
|
||||
print(f'headers inside hf rest api: {headers}')
|
||||
headers = validate_environment(api_key, headers)
|
||||
task = get_hf_task_for_model(model)
|
||||
print_verbose(f"{model}, {task}")
|
||||
|
@ -254,8 +253,6 @@ def completion(
|
|||
|
||||
## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
|
||||
is_streamed = False
|
||||
print(f"response keys: {response.__dict__.keys()}")
|
||||
print(f"response keys: {response.__dict__['headers']}")
|
||||
if response.__dict__['headers']["Content-Type"] == "text/event-stream":
|
||||
is_streamed = True
|
||||
|
||||
|
@ -313,7 +310,7 @@ def completion(
|
|||
sum_logprob = 0
|
||||
for token in completion_response[0]["details"]["tokens"]:
|
||||
sum_logprob += token["logprob"]
|
||||
model_response["choices"][0]["message"]["logprobs"] = sum_logprob
|
||||
model_response["choices"][0]["message"]._logprob = sum_logprob
|
||||
if "best_of" in optional_params and optional_params["best_of"] > 1:
|
||||
if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
|
||||
choices_list = []
|
||||
|
@ -337,9 +334,14 @@ def completion(
|
|||
prompt_tokens = len(
|
||||
encoding.encode(input_text)
|
||||
) ##[TODO] use the llama2 tokenizer here
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
) ##[TODO] use the llama2 tokenizer here
|
||||
print_verbose(f'output: {model_response["choices"][0]["message"]}')
|
||||
output_text = model_response["choices"][0]["message"].get("content", "")
|
||||
if output_text is not None and len(output_text) > 0:
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
) ##[TODO] use the llama2 tokenizer here
|
||||
else:
|
||||
completion_tokens = 0
|
||||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue