mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-15 14:43:48 +00:00
feat: together now supports base64 embedding encoding (#3559)
# What does this PR do? use together's new base64 support ## Test Plan recordings for: ./scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup together --subdirs inference --pattern openai
This commit is contained in:
parent
9c751b6789
commit
65e01b5684
27 changed files with 15951 additions and 7 deletions
61
tests/integration/recordings/responses/6841bb14fa8d.json
Normal file
61
tests/integration/recordings/responses/6841bb14fa8d.json
Normal file
|
@ -0,0 +1,61 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.together.xyz/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace openai with temperature 0"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "oCfxEyX-4Yz4kd-984c2b58fd3f4d13",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "When using a language model like OpenAI with a temperature of 0, the model is essentially forced to produce the most likely next word in a sequence, given the context. This means that the output will be very deterministic and less diverse, as the model is not allowed to explore less likely options.\n\nHere's an example of how this could work in practice:\n\n**Prompt:** Write a short story about a character who discovers a hidden world.\n\n**Temperature 0 Response:**\nIn a small village nestled",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": []
|
||||
},
|
||||
"seed": 9269366008132817000
|
||||
}
|
||||
],
|
||||
"created": 1758820586,
|
||||
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"prompt_tokens": 43,
|
||||
"total_tokens": 143,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null,
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"prompt": []
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue