litellm-mirror/litellm/llms/nlp_cloud/chat/handler.py
Ishaan Jaff 7a5dd29fe0
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 46s
(fix) unable to pass input_type parameter to Voyage AI embedding mode (#7276)
* VoyageEmbeddingConfig

* fix voyage logic to get params

* add voyage embedding transformation

* add get_provider_embedding_config

* use BaseEmbeddingConfig

* voyage clean up

* use llm http handler for embedding transformations

* test_voyage_ai_embedding_extra_params

* add voyage async

* test_voyage_ai_embedding_extra_params

* add async for llm http handler

* update BaseLLMEmbeddingTest

* test_voyage_ai_embedding_extra_params

* fix linting

* fix get_provider_embedding_config

* fix anthropic text test

* update location of base/chat/transformation

* fix import path

* fix IBMWatsonXAIConfig
2024-12-17 19:23:49 -08:00

140 lines
3.8 KiB
Python

import json
import os
import time
import types
from enum import Enum
from typing import Any, Callable, List, Optional, Union
import httpx
import litellm
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_async_httpx_client,
)
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import ModelResponse, Usage
from ..common_utils import NLPCloudError
from .transformation import NLPCloudConfig
nlp_config = NLPCloudConfig()
def completion(
model: str,
messages: list,
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
api_key,
logging_obj,
optional_params: dict,
litellm_params: dict,
logger_fn=None,
default_max_tokens_to_sample=None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
headers={},
):
headers = nlp_config.validate_environment(
api_key=api_key,
headers=headers,
model=model,
messages=messages,
optional_params=optional_params,
)
## Load Config
config = litellm.NLPCloudConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
completion_url_fragment_1 = api_base
completion_url_fragment_2 = "/generation"
model = model
completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
data = nlp_config.transform_request(
model=model,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=None,
api_key=api_key,
additional_args={
"complete_input_dict": data,
"headers": headers,
"api_base": completion_url,
},
)
## COMPLETION CALL
if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client()
response = client.post(
completion_url,
headers=headers,
data=json.dumps(data),
stream=optional_params["stream"] if "stream" in optional_params else False,
)
if "stream" in optional_params and optional_params["stream"] is True:
return clean_and_iterate_chunks(response)
else:
return nlp_config.transform_response(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
api_key=api_key,
request_data=data,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
)
# def clean_and_iterate_chunks(response):
# def process_chunk(chunk):
# print(f"received chunk: {chunk}")
# cleaned_chunk = chunk.decode("utf-8")
# # Perform further processing based on your needs
# return cleaned_chunk
# for line in response.iter_lines():
# if line:
# yield process_chunk(line)
def clean_and_iterate_chunks(response):
buffer = b""
for chunk in response.iter_content(chunk_size=1024):
if not chunk:
break
buffer += chunk
while b"\x00" in buffer:
buffer = buffer.replace(b"\x00", b"")
yield buffer.decode("utf-8")
buffer = b""
# No more data expected, yield any remaining data in the buffer
if buffer:
yield buffer.decode("utf-8")
def embedding():
# logic for parsing in - calling - parsing out model embedding calls
pass