[bugfix] fix prompt_adapter interleaved_content_convert_to_raw (#696)

# What does this PR do?

- fix interleaved_content_convert_to_raw in prompt_adapter to correctly
convert ImageContentItem to RawMediaItem with raw data bytes

## Test Plan

```
torchrun $CONDA_PREFIX/bin/pytest -v -s -k "meta_reference" --inference-model="meta-llama/Llama-3.2-11B-Vision-Instruct" ./llama_stack/providers/tests/inference/test_vision_inference.py
```

**Before**
<img width="844" alt="image"
src="https://github.com/user-attachments/assets/f2784b42-2e36-4477-9041-903d5d628a68"
/>


**After**
<img width="836" alt="image"
src="https://github.com/user-attachments/assets/362b6e47-29f7-4119-bcf3-f75db842735f"
/>


## Sources

Please link relevant resources if necessary.


## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Xi Yan 2024-12-30 16:40:36 -08:00 committed by GitHub
parent 7c1e3daa75
commit a6c206ea66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -40,7 +40,6 @@ from llama_stack.apis.common.content_types import (
InterleavedContent,
InterleavedContentItem,
TextContentItem,
URL,
)
from llama_stack.apis.inference import (
@ -117,27 +116,31 @@ async def interleaved_content_convert_to_raw(
elif isinstance(c, TextContentItem):
return RawTextItem(text=c.text)
elif isinstance(c, ImageContentItem):
# load image and return PIL version
img = c.data
if isinstance(img, URL):
if img.uri.startswith("data"):
match = re.match(r"data:image/(\w+);base64,(.+)", img.uri)
if c.url:
# Load image bytes from URL
if c.url.uri.startswith("data"):
match = re.match(r"data:image/(\w+);base64,(.+)", c.url.uri)
if not match:
raise ValueError("Invalid data URL format")
raise ValueError(
f"Invalid data URL format, {c.url.uri[:40]}..."
)
_, image_data = match.groups()
data = base64.b64decode(image_data)
elif img.uri.startswith("file://"):
path = img.uri[len("file://") :]
elif c.url.uri.startswith("file://"):
path = c.url.uri[len("file://") :]
with open(path, "rb") as f:
data = f.read() # type: ignore
elif img.uri.startswith("http"):
elif c.url.uri.startswith("http"):
async with httpx.AsyncClient() as client:
response = await client.get(img.uri)
response = await client.get(c.url.uri)
data = response.content
else:
raise ValueError("Unsupported URL type")
else:
elif c.data:
data = c.data
else:
raise ValueError("No data or URL provided")
return RawMediaItem(data=data)
else:
raise ValueError(f"Unsupported content type: {type(c)}")