mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
async call in separate thread
This commit is contained in:
parent
ae43044a57
commit
adb768f827
5 changed files with 72 additions and 55 deletions
|
@ -23,14 +23,16 @@ class EvaluationClient(Evals):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def run_evals(self, model: str, dataset: str, task: str) -> EvaluateResponse:
|
||||
async def run_evals(
|
||||
self, model: str, task: str, dataset: Optional[str] = None
|
||||
) -> EvaluateResponse:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/evals/run",
|
||||
json={
|
||||
"model": model,
|
||||
"dataset": dataset,
|
||||
"task": task,
|
||||
"dataset": dataset,
|
||||
},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=3600,
|
||||
|
@ -43,20 +45,19 @@ async def run_main(host: str, port: int):
|
|||
client = EvaluationClient(f"http://{host}:{port}")
|
||||
|
||||
# CustomDataset
|
||||
response = await client.run_evals(
|
||||
"Llama3.1-8B-Instruct",
|
||||
"mmlu-simple-eval-en",
|
||||
"mmlu",
|
||||
)
|
||||
cprint(f"evaluate response={response}", "green")
|
||||
|
||||
# Eleuther Eval
|
||||
# response = await client.run_evals(
|
||||
# "Llama3.1-8B-Instruct",
|
||||
# "PLACEHOLDER_DATASET_NAME",
|
||||
# "mmlu",
|
||||
# model="Llama3.1-8B-Instruct",
|
||||
# dataset="mmlu-simple-eval-en",
|
||||
# task="mmlu",
|
||||
# )
|
||||
# cprint(response.metrics["metrics_table"], "red")
|
||||
# cprint(f"evaluate response={response}", "green")
|
||||
|
||||
# Eleuther Eval Task
|
||||
response = await client.run_evals(
|
||||
model="Llama3.1-8B-Instruct",
|
||||
task="meta_mmlu_pro_instruct",
|
||||
)
|
||||
cprint(response.metrics["metrics_table"], "red")
|
||||
|
||||
|
||||
def main(host: str, port: int):
|
||||
|
|
|
@ -64,8 +64,8 @@ class Evals(Protocol):
|
|||
async def run_evals(
|
||||
self,
|
||||
model: str,
|
||||
dataset: str,
|
||||
task: str,
|
||||
dataset: Optional[str] = None,
|
||||
) -> EvaluateResponse: ...
|
||||
|
||||
@webmethod(route="/evals/jobs")
|
||||
|
|
|
@ -28,14 +28,17 @@ class MetaReferenceEvalsImpl(Evals):
|
|||
async def run_evals(
|
||||
self,
|
||||
model: str,
|
||||
dataset: str,
|
||||
task: str,
|
||||
dataset: Optional[str] = None,
|
||||
) -> EvaluateResponse:
|
||||
cprint(f"model={model}, dataset={dataset}, task={task}", "red")
|
||||
if not dataset:
|
||||
raise ValueError("dataset must be specified for mete-reference evals")
|
||||
|
||||
dataset = DatasetRegistry.get_dataset(dataset)
|
||||
dataset.load()
|
||||
task_impl = TaskRegistry.get_task(task)(dataset)
|
||||
|
||||
task_impl = TaskRegistry.get_task(task)(dataset)
|
||||
x1 = task_impl.preprocess()
|
||||
|
||||
# TODO: replace w/ batch inference & async return eval job
|
||||
|
|
|
@ -91,9 +91,10 @@ class MetaReferenceInferenceImpl(Inference):
|
|||
else:
|
||||
return self._nonstream_chat_completion(request)
|
||||
|
||||
def _nonstream_chat_completion(
|
||||
async def _nonstream_chat_completion(
|
||||
self, request: ChatCompletionRequest
|
||||
) -> ChatCompletionResponse:
|
||||
async with SEMAPHORE:
|
||||
messages = chat_completion_request_to_messages(request)
|
||||
|
||||
tokens = []
|
||||
|
@ -120,14 +121,18 @@ class MetaReferenceInferenceImpl(Inference):
|
|||
|
||||
logprobs.append(
|
||||
TokenLogProbs(
|
||||
logprobs_by_token={token_result.text: token_result.logprobs[0]}
|
||||
logprobs_by_token={
|
||||
token_result.text: token_result.logprobs[0]
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if stop_reason is None:
|
||||
stop_reason = StopReason.out_of_tokens
|
||||
|
||||
message = self.generator.formatter.decode_assistant_message(tokens, stop_reason)
|
||||
message = self.generator.formatter.decode_assistant_message(
|
||||
tokens, stop_reason
|
||||
)
|
||||
return ChatCompletionResponse(
|
||||
completion_message=message,
|
||||
logprobs=logprobs if request.logprobs else None,
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.apis.evals import * # noqa: F403
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
import lm_eval
|
||||
|
@ -19,6 +21,12 @@ from termcolor import cprint
|
|||
from .config import EleutherEvalsImplConfig # noqa
|
||||
|
||||
|
||||
# https://stackoverflow.com/questions/74703727/how-to-call-async-function-from-sync-funcion-and-get-result-while-a-loop-is-alr
|
||||
# We will use another thread wih its own event loop to run the async api within sync function
|
||||
_loop = asyncio.new_event_loop()
|
||||
_thr = threading.Thread(target=_loop.run_forever, name="Async Runner", daemon=True)
|
||||
|
||||
|
||||
class EleutherEvalsWrapper(LM):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -89,8 +97,10 @@ class EleutherEvalsWrapper(LM):
|
|||
|
||||
def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]:
|
||||
res = []
|
||||
if not _thr.is_alive():
|
||||
_thr.start()
|
||||
for req in requests:
|
||||
response = self.inference_api.chat_completion(
|
||||
chat_completion_coro_fn = self.inference_api.chat_completion(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
|
@ -100,7 +110,8 @@ class EleutherEvalsWrapper(LM):
|
|||
],
|
||||
stream=False,
|
||||
)
|
||||
print(response)
|
||||
future = asyncio.run_coroutine_threadsafe(chat_completion_coro_fn, _loop)
|
||||
response = future.result()
|
||||
res.append(response.completion_message.content)
|
||||
|
||||
return res
|
||||
|
@ -119,16 +130,13 @@ class EleutherEvalsAdapter(Evals):
|
|||
async def run_evals(
|
||||
self,
|
||||
model: str,
|
||||
dataset: str,
|
||||
task: str,
|
||||
dataset: Optional[str] = None,
|
||||
) -> EvaluateResponse:
|
||||
eluther_wrapper = EleutherEvalsWrapper(self.inference_api, model)
|
||||
|
||||
cprint(f"Eleuther Evals: {model} {dataset} {task}", "red")
|
||||
|
||||
task = "meta_mmlu_pro_instruct"
|
||||
eluther_wrapper = EleutherEvalsWrapper(self.inference_api, model)
|
||||
current_dir = Path(os.path.dirname(os.path.abspath(__file__)))
|
||||
print(current_dir)
|
||||
|
||||
task_manager = TaskManager(
|
||||
include_path=str(current_dir / "tasks"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue