mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
feat: add embedding and dynamic model support to Together inference adapter
- updated to use OpenAIMixin - workarounds for Together api quirks - recordings for together suite when subdirs=inference,pattern=openai test with: `TOGETHER_API_KEY=_NONE_ ./scripts/integration-tests.sh --stack-config server:ci-tests --setup together --subdirs inference --pattern openai`
This commit is contained in:
parent
f4ab154ade
commit
3e02dc5c2f
20 changed files with 9227 additions and 180 deletions
|
@ -96,6 +96,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
|
|||
# Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
|
||||
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
|
||||
"remote::tgi", # TGI ignores n param silently
|
||||
"remote::together", # `n` > 1 is not supported when streaming tokens. Please disable `stream`
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue