mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 19:04:19 +00:00
fix: Call pandas.read_* in a seperate thread (#1698)
These block on io reads which in turn block the server. Move them to their own thread. Closes: #1697 # What does this PR do? To avoid blocking the main eventloop, updates datasetio/localfs to load data in a seperate thread Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
parent
65ca85ba6b
commit
6949bd1999
2 changed files with 11 additions and 7 deletions
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
from urllib.parse import unquote
|
||||
|
@ -13,12 +14,15 @@ import pandas
|
|||
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||
|
||||
|
||||
def get_dataframe_from_uri(uri: str):
|
||||
async def get_dataframe_from_uri(uri: str):
|
||||
df = None
|
||||
if uri.endswith(".csv"):
|
||||
df = pandas.read_csv(uri)
|
||||
# Moving to its own thread to avoid io from blocking the eventloop
|
||||
# This isn't ideal as it moves more then just the IO to a new thread
|
||||
# but it is as close as we can easly get
|
||||
df = await asyncio.to_thread(pandas.read_csv, uri)
|
||||
elif uri.endswith(".xlsx"):
|
||||
df = pandas.read_excel(uri)
|
||||
df = await asyncio.to_thread(pandas.read_excel, uri)
|
||||
elif uri.startswith("data:"):
|
||||
parts = parse_data_url(uri)
|
||||
data = parts["data"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue