async call in separate thread

This commit is contained in:
Xi Yan 2024-10-09 13:18:15 -07:00
parent ae43044a57
commit adb768f827
5 changed files with 72 additions and 55 deletions

View file

@ -23,14 +23,16 @@ class EvaluationClient(Evals):
async def shutdown(self) -> None:
pass
async def run_evals(self, model: str, dataset: str, task: str) -> EvaluateResponse:
async def run_evals(
self, model: str, task: str, dataset: Optional[str] = None
) -> EvaluateResponse:
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.base_url}/evals/run",
json={
"model": model,
"dataset": dataset,
"task": task,
"dataset": dataset,
},
headers={"Content-Type": "application/json"},
timeout=3600,
@ -43,20 +45,19 @@ async def run_main(host: str, port: int):
client = EvaluationClient(f"http://{host}:{port}")
# CustomDataset
response = await client.run_evals(
"Llama3.1-8B-Instruct",
"mmlu-simple-eval-en",
"mmlu",
)
cprint(f"evaluate response={response}", "green")
# Eleuther Eval
# response = await client.run_evals(
# "Llama3.1-8B-Instruct",
# "PLACEHOLDER_DATASET_NAME",
# "mmlu",
# model="Llama3.1-8B-Instruct",
# dataset="mmlu-simple-eval-en",
# task="mmlu",
# )
# cprint(response.metrics["metrics_table"], "red")
# cprint(f"evaluate response={response}", "green")
# Eleuther Eval Task
response = await client.run_evals(
model="Llama3.1-8B-Instruct",
task="meta_mmlu_pro_instruct",
)
cprint(response.metrics["metrics_table"], "red")
def main(host: str, port: int):

View file

@ -64,8 +64,8 @@ class Evals(Protocol):
async def run_evals(
self,
model: str,
dataset: str,
task: str,
dataset: Optional[str] = None,
) -> EvaluateResponse: ...
@webmethod(route="/evals/jobs")

View file

@ -28,14 +28,17 @@ class MetaReferenceEvalsImpl(Evals):
async def run_evals(
self,
model: str,
dataset: str,
task: str,
dataset: Optional[str] = None,
) -> EvaluateResponse:
cprint(f"model={model}, dataset={dataset}, task={task}", "red")
if not dataset:
raise ValueError("dataset must be specified for mete-reference evals")
dataset = DatasetRegistry.get_dataset(dataset)
dataset.load()
task_impl = TaskRegistry.get_task(task)(dataset)
task_impl = TaskRegistry.get_task(task)(dataset)
x1 = task_impl.preprocess()
# TODO: replace w/ batch inference & async return eval job

View file

@ -91,9 +91,10 @@ class MetaReferenceInferenceImpl(Inference):
else:
return self._nonstream_chat_completion(request)
def _nonstream_chat_completion(
async def _nonstream_chat_completion(
self, request: ChatCompletionRequest
) -> ChatCompletionResponse:
async with SEMAPHORE:
messages = chat_completion_request_to_messages(request)
tokens = []
@ -120,14 +121,18 @@ class MetaReferenceInferenceImpl(Inference):
logprobs.append(
TokenLogProbs(
logprobs_by_token={token_result.text: token_result.logprobs[0]}
logprobs_by_token={
token_result.text: token_result.logprobs[0]
}
)
)
if stop_reason is None:
stop_reason = StopReason.out_of_tokens
message = self.generator.formatter.decode_assistant_message(tokens, stop_reason)
message = self.generator.formatter.decode_assistant_message(
tokens, stop_reason
)
return ChatCompletionResponse(
completion_message=message,
logprobs=logprobs if request.logprobs else None,

View file

@ -4,10 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.evals import * # noqa: F403
import os
import random
import threading
from pathlib import Path
import lm_eval
@ -19,6 +21,12 @@ from termcolor import cprint
from .config import EleutherEvalsImplConfig # noqa
# https://stackoverflow.com/questions/74703727/how-to-call-async-function-from-sync-funcion-and-get-result-while-a-loop-is-alr
# We will use another thread wih its own event loop to run the async api within sync function
_loop = asyncio.new_event_loop()
_thr = threading.Thread(target=_loop.run_forever, name="Async Runner", daemon=True)
class EleutherEvalsWrapper(LM):
def __init__(
self,
@ -89,8 +97,10 @@ class EleutherEvalsWrapper(LM):
def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]:
res = []
if not _thr.is_alive():
_thr.start()
for req in requests:
response = self.inference_api.chat_completion(
chat_completion_coro_fn = self.inference_api.chat_completion(
model=self.model,
messages=[
{
@ -100,7 +110,8 @@ class EleutherEvalsWrapper(LM):
],
stream=False,
)
print(response)
future = asyncio.run_coroutine_threadsafe(chat_completion_coro_fn, _loop)
response = future.result()
res.append(response.completion_message.content)
return res
@ -119,16 +130,13 @@ class EleutherEvalsAdapter(Evals):
async def run_evals(
self,
model: str,
dataset: str,
task: str,
dataset: Optional[str] = None,
) -> EvaluateResponse:
eluther_wrapper = EleutherEvalsWrapper(self.inference_api, model)
cprint(f"Eleuther Evals: {model} {dataset} {task}", "red")
task = "meta_mmlu_pro_instruct"
eluther_wrapper = EleutherEvalsWrapper(self.inference_api, model)
current_dir = Path(os.path.dirname(os.path.abspath(__file__)))
print(current_dir)
task_manager = TaskManager(
include_path=str(current_dir / "tasks"),