(Refactor) Code Quality improvement - Use Common base handler for anthropic_text/ (#7143)

* add anthropic text provider * add ANTHROPIC_TEXT to LlmProviders * fix anthropic text implementation * working anthropic text claude-2 * test_acompletion_claude2_stream * add param mapping for anthropic text * fix unused imports * fix anthropic completion handler.py
2025-04-25 10:44:24 +00:00 · 2024-12-10 12:23:58 -08:00 · 2024-12-10 12:23:58 -08:00 · bdb20821ea
commit bdb20821ea
parent 5e016fe66a
12 changed files with 528 additions and 498 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -846,6 +846,7 @@ class LlmProviders(str, Enum):
    COHERE_CHAT = "cohere_chat"
    CLARIFAI = "clarifai"
    ANTHROPIC = "anthropic"
    ANTHROPIC_TEXT = "anthropic_text"
    REPLICATE = "replicate"
    HUGGINGFACE = "huggingface"
    TOGETHER_AI = "together_ai"
@ -1060,7 +1061,7 @@ from .llms.anthropic.experimental_pass_through.transformation import (
    AnthropicExperimentalPassThroughConfig,
 )
 from .llms.groq.stt.transformation import GroqSTTConfig
-from .llms.anthropic.completion import AnthropicTextConfig
+from .llms.anthropic.completion.transformation import AnthropicTextConfig
 from .llms.databricks.chat.transformation import DatabricksConfig
 from .llms.databricks.embed.transformation import DatabricksEmbeddingConfig
 from .llms.predibase import PredibaseConfig
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -12,6 +12,7 @@ LITELLM_CHAT_PROVIDERS = [
    "cohere_chat",
    "clarifai",
    "anthropic",
    "anthropic_text",
    "replicate",
    "huggingface",
    "together_ai",
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -52,6 +52,39 @@ def handle_cohere_chat_model_custom_llm_provider(
    return model, custom_llm_provider
 def handle_anthropic_text_model_custom_llm_provider(
    model: str, custom_llm_provider: Optional[str] = None
 ) -> Tuple[str, Optional[str]]:
    """
    if user sets model = "anthropic/claude-2" -> use custom_llm_provider = "anthropic_text"
    Args:
        model:
        custom_llm_provider:
    Returns:
        model, custom_llm_provider
    """
    if custom_llm_provider:
        if (
            custom_llm_provider == "anthropic"
            and litellm.AnthropicTextConfig._is_anthropic_text_model(model)
        ):
            return model, "anthropic_text"
    if "/" in model:
        _custom_llm_provider, _model = model.split("/", 1)
        if (
            _custom_llm_provider
            and _custom_llm_provider == "anthropic"
            and litellm.AnthropicTextConfig._is_anthropic_text_model(_model)
        ):
            return _model, "anthropic_text"
    return model, custom_llm_provider
 def get_llm_provider(  # noqa: PLR0915
    model: str,
    custom_llm_provider: Optional[str] = None,
@ -92,6 +125,10 @@ def get_llm_provider(  # noqa: PLR0915
            model, custom_llm_provider
        )
        model, custom_llm_provider = handle_anthropic_text_model_custom_llm_provider(
            model, custom_llm_provider
        )
        if custom_llm_provider:
            if (
                model.split("/")[0] == custom_llm_provider
@ -210,7 +247,10 @@ def get_llm_provider(  # noqa: PLR0915
            custom_llm_provider = "text-completion-openai"
        ## anthropic
        elif model in litellm.anthropic_models:
-            custom_llm_provider = "anthropic"
+            if litellm.AnthropicTextConfig._is_anthropic_text_model(model):
                custom_llm_provider = "anthropic_text"
            else:
                custom_llm_provider = "anthropic"
        ## cohere
        elif model in litellm.cohere_models or model in litellm.cohere_embedding_models:
            custom_llm_provider = "cohere"
@ -531,7 +571,9 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
        )
    elif custom_llm_provider == "galadriel":
        api_base = (
-            api_base or get_secret("GALADRIEL_API_BASE") or "https://api.galadriel.com/v1"
+            api_base
            or get_secret("GALADRIEL_API_BASE")
            or "https://api.galadriel.com/v1"
        )  # type: ignore
        dynamic_api_key = api_key or get_secret_str("GALADRIEL_API_KEY")
    if api_base is not None and not isinstance(api_base, str):
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -223,40 +223,6 @@ class CustomStreamWrapper:
            self.holding_chunk = ""
        return hold, curr_chunk
    def handle_anthropic_text_chunk(self, chunk):
        """
        For old anthropic models - claude-1, claude-2.
        Claude-3 is handled from within Anthropic.py VIA ModelResponseIterator()
        """
        str_line = chunk
        if isinstance(chunk, bytes):  # Handle binary data
            str_line = chunk.decode("utf-8")  # Convert bytes to string
        text = ""
        is_finished = False
        finish_reason = None
        if str_line.startswith("data:"):
            data_json = json.loads(str_line[5:])
            type_chunk = data_json.get("type", None)
            if type_chunk == "completion":
                text = data_json.get("completion")
                finish_reason = data_json.get("stop_reason")
                if finish_reason is not None:
                    is_finished = True
            return {
                "text": text,
                "is_finished": is_finished,
                "finish_reason": finish_reason,
            }
        elif "error" in str_line:
            raise ValueError(f"Unable to parse response. Original response: {str_line}")
        else:
            return {
                "text": text,
                "is_finished": is_finished,
                "finish_reason": finish_reason,
            }
    def handle_predibase_chunk(self, chunk):
        try:
            if not isinstance(chunk, str):
@ -1005,14 +971,6 @@ class CustomStreamWrapper:
                        setattr(model_response, key, value)
                response_obj = anthropic_response_obj
            elif (
                self.custom_llm_provider
                and self.custom_llm_provider == "anthropic_text"
            ):
                response_obj = self.handle_anthropic_text_chunk(chunk)
                completion_obj["content"] = response_obj["text"]
                if response_obj["is_finished"]:
                    self.received_finish_reason = response_obj["finish_reason"]
            elif self.model == "replicate" or self.custom_llm_provider == "replicate":
                response_obj = self.handle_replicate_chunk(chunk)
                completion_obj["content"] = response_obj["text"]
--- a/litellm/llms/anthropic/completion.py
+++ b/litellm/llms/anthropic/completion.py
@ -1,362 +0,0 @@
 """
 Translation logic for anthropic's `/v1/complete` endpoint
 """
 import json
 import os
 import time
 import types
 from enum import Enum
 from typing import Callable, Optional
 import httpx
 import requests
 import litellm
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
 )
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 from ..base import BaseLLM
 from ..prompt_templates.factory import custom_prompt, prompt_factory
 class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman: "
    AI_PROMPT = "\n\nAssistant: "
 class AnthropicError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
        self.request = httpx.Request(
            method="POST", url="https://api.anthropic.com/v1/complete"
        )
        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
 class AnthropicTextConfig:
    """
    Reference: https://docs.anthropic.com/claude/reference/complete_post
    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
    """
    max_tokens_to_sample: Optional[int] = (
        litellm.max_tokens
    )  # anthropic requires a default
    stop_sequences: Optional[list] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    top_k: Optional[int] = None
    metadata: Optional[dict] = None
    def __init__(
        self,
        max_tokens_to_sample: Optional[int] = 256,  # anthropic requires a default
        stop_sequences: Optional[list] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        top_k: Optional[int] = None,
        metadata: Optional[dict] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
 # makes headers for API call
 def validate_environment(api_key, user_headers):
    if api_key is None:
        raise ValueError(
            "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
        )
    headers = {
        "accept": "application/json",
        "anthropic-version": "2023-06-01",
        "content-type": "application/json",
        "x-api-key": api_key,
    }
    if user_headers is not None and isinstance(user_headers, dict):
        headers = {**headers, **user_headers}
    return headers
 class AnthropicTextCompletion(BaseLLM):
    def __init__(self) -> None:
        super().__init__()
    def _process_response(
        self, model_response: ModelResponse, response, encoding, prompt: str, model: str
    ):
        ## RESPONSE OBJECT
        try:
            completion_response = response.json()
        except Exception:
            raise AnthropicError(
                message=response.text, status_code=response.status_code
            )
        if "error" in completion_response:
            raise AnthropicError(
                message=str(completion_response["error"]),
                status_code=response.status_code,
            )
        else:
            if len(completion_response["completion"]) > 0:
                model_response.choices[0].message.content = completion_response[  # type: ignore
                    "completion"
                ]
            model_response.choices[0].finish_reason = completion_response["stop_reason"]
        ## CALCULATING USAGE
        prompt_tokens = len(
            encoding.encode(prompt)
        )  ##[TODO] use the anthropic tokenizer here
        completion_tokens = len(
            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )  ##[TODO] use the anthropic tokenizer here
        model_response.created = int(time.time())
        model_response.model = model
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=prompt_tokens + completion_tokens,
        )
        setattr(model_response, "usage", usage)
        return model_response
    async def async_completion(
        self,
        model: str,
        model_response: ModelResponse,
        api_base: str,
        logging_obj,
        encoding,
        headers: dict,
        data: dict,
        client=None,
    ):
        if client is None:
            client = get_async_httpx_client(
                llm_provider=litellm.LlmProviders.ANTHROPIC,
                params={"timeout": httpx.Timeout(timeout=600.0, connect=5.0)},
            )
        response = await client.post(api_base, headers=headers, data=json.dumps(data))
        if response.status_code != 200:
            raise AnthropicError(
                status_code=response.status_code, message=response.text
            )
        ## LOGGING
        logging_obj.post_call(
            input=data["prompt"],
            api_key=headers.get("x-api-key"),
            original_response=response.text,
            additional_args={"complete_input_dict": data},
        )
        response = self._process_response(
            model_response=model_response,
            response=response,
            encoding=encoding,
            prompt=data["prompt"],
            model=model,
        )
        return response
    async def async_streaming(
        self,
        model: str,
        api_base: str,
        logging_obj,
        headers: dict,
        data: Optional[dict],
        client=None,
    ):
        if client is None:
            client = get_async_httpx_client(
                llm_provider=litellm.LlmProviders.ANTHROPIC,
                params={"timeout": httpx.Timeout(timeout=600.0, connect=5.0)},
            )
        response = await client.post(api_base, headers=headers, data=json.dumps(data))
        if response.status_code != 200:
            raise AnthropicError(
                status_code=response.status_code, message=response.text
            )
        completion_stream = response.aiter_lines()
        streamwrapper = CustomStreamWrapper(
            completion_stream=completion_stream,
            model=model,
            custom_llm_provider="anthropic_text",
            logging_obj=logging_obj,
        )
        return streamwrapper
    def completion(
        self,
        model: str,
        messages: list,
        api_base: str,
        acompletion: str,
        custom_prompt_dict: dict,
        model_response: ModelResponse,
        print_verbose: Callable,
        encoding,
        api_key,
        logging_obj,
        optional_params: dict,
        litellm_params=None,
        logger_fn=None,
        headers={},
        client=None,
    ):
        headers = validate_environment(api_key, headers)
        if model in custom_prompt_dict:
            # check if the model has a registered custom prompt
            model_prompt_details = custom_prompt_dict[model]
            prompt = custom_prompt(
                role_dict=model_prompt_details["roles"],
                initial_prompt_value=model_prompt_details["initial_prompt_value"],
                final_prompt_value=model_prompt_details["final_prompt_value"],
                messages=messages,
            )
        else:
            prompt = prompt_factory(
                model=model, messages=messages, custom_llm_provider="anthropic"
            )
        ## Load Config
        config = litellm.AnthropicTextConfig.get_config()
        for k, v in config.items():
            if (
                k not in optional_params
            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                optional_params[k] = v
        data = {
            "model": model,
            "prompt": prompt,
            **optional_params,
        }
        ## LOGGING
        logging_obj.pre_call(
            input=prompt,
            api_key=api_key,
            additional_args={
                "complete_input_dict": data,
                "api_base": api_base,
                "headers": headers,
            },
        )
        ## COMPLETION CALL
        if "stream" in optional_params and optional_params["stream"] is True:
            if acompletion is True:
                return self.async_streaming(
                    model=model,
                    api_base=api_base,
                    logging_obj=logging_obj,
                    headers=headers,
                    data=data,
                    client=None,
                )
            if client is None:
                client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
            response = client.post(
                api_base,
                headers=headers,
                data=json.dumps(data),
                # stream=optional_params["stream"],
            )
            if response.status_code != 200:
                raise AnthropicError(
                    status_code=response.status_code, message=response.text
                )
            completion_stream = response.iter_lines()
            stream_response = CustomStreamWrapper(
                completion_stream=completion_stream,
                model=model,
                custom_llm_provider="anthropic_text",
                logging_obj=logging_obj,
            )
            return stream_response
        elif acompletion is True:
            return self.async_completion(
                model=model,
                model_response=model_response,
                api_base=api_base,
                logging_obj=logging_obj,
                encoding=encoding,
                headers=headers,
                data=data,
                client=client,
            )
        else:
            if client is None:
                client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
            response = client.post(api_base, headers=headers, data=json.dumps(data))
            if response.status_code != 200:
                raise AnthropicError(
                    status_code=response.status_code, message=response.text
                )
            ## LOGGING
            logging_obj.post_call(
                input=prompt,
                api_key=api_key,
                original_response=response.text,
                additional_args={"complete_input_dict": data},
            )
            print_verbose(f"raw model_response: {response.text}")
            response = self._process_response(
                model_response=model_response,
                response=response,
                encoding=encoding,
                prompt=data["prompt"],
                model=model,
            )
            return response
    def embedding(self):
        # logic for parsing in - calling - parsing out model embedding calls
        pass
--- a/litellm/llms/anthropic/completion/handler.py
+++ b/litellm/llms/anthropic/completion/handler.py
@ -0,0 +1,5 @@
 """
 Anthropic /complete API - uses `llm_http_handler.py` to make httpx requests
 Request/Response transformation is handled in `transformation.py`
 """
--- a/litellm/llms/anthropic/completion/transformation.py
+++ b/litellm/llms/anthropic/completion/transformation.py
@ -0,0 +1,307 @@
 """
 Translation logic for anthropic's `/v1/complete` endpoint
 Litellm provider slug: `anthropic_text/<model_name>`
 """
 import json
 import time
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
 import httpx
 import litellm
 from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
 from litellm.llms.base_llm.transformation import (
    BaseConfig,
    BaseLLMException,
    LiteLLMLoggingObj,
 )
 from litellm.llms.prompt_templates.factory import custom_prompt, prompt_factory
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import (
    ChatCompletionToolCallChunk,
    ChatCompletionUsageBlock,
    GenericStreamingChunk,
    ModelResponse,
    Usage,
 )
 class AnthropicTextError(BaseLLMException):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
        self.request = httpx.Request(
            method="POST", url="https://api.anthropic.com/v1/complete"
        )
        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            message=self.message,
            status_code=self.status_code,
            request=self.request,
            response=self.response,
        )  # Call the base class constructor with the parameters it needs
 class AnthropicTextConfig(BaseConfig):
    """
    Reference: https://docs.anthropic.com/claude/reference/complete_post
    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
    """
    max_tokens_to_sample: Optional[int] = (
        litellm.max_tokens
    )  # anthropic requires a default
    stop_sequences: Optional[list] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    top_k: Optional[int] = None
    metadata: Optional[dict] = None
    def __init__(
        self,
        max_tokens_to_sample: Optional[int] = 256,  # anthropic requires a default
        stop_sequences: Optional[list] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        top_k: Optional[int] = None,
        metadata: Optional[dict] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    # makes headers for API call
    def validate_environment(
        self,
        headers: dict,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        api_key: Optional[str] = None,
    ) -> dict:
        if api_key is None:
            raise ValueError(
                "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
            )
        _headers = {
            "accept": "application/json",
            "anthropic-version": "2023-06-01",
            "content-type": "application/json",
            "x-api-key": api_key,
        }
        headers.update(_headers)
        return headers
    def transform_request(
        self,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        litellm_params: dict,
        headers: dict,
    ) -> dict:
        prompt = self._get_anthropic_text_prompt_from_messages(
            messages=messages, model=model
        )
        ## Load Config
        config = litellm.AnthropicTextConfig.get_config()
        for k, v in config.items():
            if (
                k not in optional_params
            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                optional_params[k] = v
        data = {
            "model": model,
            "prompt": prompt,
            **optional_params,
        }
        return data
    def get_supported_openai_params(self, model: str):
        """
        Anthropic /complete API Ref: https://docs.anthropic.com/en/api/complete
        """
        return [
            "stream",
            "max_tokens",
            "max_completion_tokens",
            "stop",
            "temperature",
            "top_p",
            "extra_headers",
            "user",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        """
        Follows the same logic as the AnthropicConfig.map_openai_params method (which is the Anthropic /messages API)
        Note: the only difference is in the get supported openai params method between the AnthropicConfig and AnthropicTextConfig
        API Ref: https://docs.anthropic.com/en/api/complete
        """
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens_to_sample"] = value
            if param == "max_completion_tokens":
                optional_params["max_tokens_to_sample"] = value
            if param == "stream" and value is True:
                optional_params["stream"] = value
            if param == "stop" and (isinstance(value, str) or isinstance(value, list)):
                _value = litellm.AnthropicConfig()._map_stop_sequences(value)
                if _value is not None:
                    optional_params["stop_sequences"] = _value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "user":
                optional_params["metadata"] = {"user_id": value}
        return optional_params
    def transform_response(
        self,
        model: str,
        raw_response: httpx.Response,
        model_response: ModelResponse,
        logging_obj: LiteLLMLoggingObj,
        request_data: dict,
        messages: List[AllMessageValues],
        optional_params: dict,
        encoding: str,
        api_key: Optional[str] = None,
        json_mode: Optional[bool] = None,
    ) -> ModelResponse:
        try:
            completion_response = raw_response.json()
        except Exception:
            raise AnthropicTextError(
                message=raw_response.text, status_code=raw_response.status_code
            )
        prompt = self._get_anthropic_text_prompt_from_messages(
            messages=messages, model=model
        )
        if "error" in completion_response:
            raise AnthropicTextError(
                message=str(completion_response["error"]),
                status_code=raw_response.status_code,
            )
        else:
            if len(completion_response["completion"]) > 0:
                model_response.choices[0].message.content = completion_response[  # type: ignore
                    "completion"
                ]
            model_response.choices[0].finish_reason = completion_response["stop_reason"]
        ## CALCULATING USAGE
        prompt_tokens = len(
            encoding.encode(prompt)
        )  ##[TODO] use the anthropic tokenizer here
        completion_tokens = len(
            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )  ##[TODO] use the anthropic tokenizer here
        model_response.created = int(time.time())
        model_response.model = model
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=prompt_tokens + completion_tokens,
        )
        setattr(model_response, "usage", usage)
        return model_response
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
    ) -> BaseLLMException:
        return AnthropicTextError(
            status_code=status_code,
            message=error_message,
        )
    @staticmethod
    def _is_anthropic_text_model(model: str) -> bool:
        return model == "claude-2" or model == "claude-instant-1"
    def _get_anthropic_text_prompt_from_messages(
        self, messages: List[AllMessageValues], model: str
    ) -> str:
        custom_prompt_dict = litellm.custom_prompt_dict
        if model in custom_prompt_dict:
            # check if the model has a registered custom prompt
            model_prompt_details = custom_prompt_dict[model]
            prompt = custom_prompt(
                role_dict=model_prompt_details["roles"],
                initial_prompt_value=model_prompt_details["initial_prompt_value"],
                final_prompt_value=model_prompt_details["final_prompt_value"],
                messages=messages,
            )
        else:
            prompt = prompt_factory(
                model=model, messages=messages, custom_llm_provider="anthropic"
            )
        return str(prompt)
    def _transform_messages(
        self, messages: List[AllMessageValues]
    ) -> List[AllMessageValues]:
        "Not required"
        raise NotImplementedError
    def get_model_response_iterator(
        self,
        streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
        sync_stream: bool,
        json_mode: Optional[bool] = False,
    ):
        return AnthropicTextCompletionResponseIterator(
            streaming_response=streaming_response,
            sync_stream=sync_stream,
            json_mode=json_mode,
        )
 class AnthropicTextCompletionResponseIterator(BaseModelResponseIterator):
    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
        try:
            text = ""
            tool_use: Optional[ChatCompletionToolCallChunk] = None
            is_finished = False
            finish_reason = ""
            usage: Optional[ChatCompletionUsageBlock] = None
            provider_specific_fields = None
            index = int(chunk.get("index", 0))
            _chunk_text = chunk.get("completion", None)
            if _chunk_text is not None and isinstance(_chunk_text, str):
                text = _chunk_text
            finish_reason = chunk.get("stop_reason", None)
            if finish_reason is not None:
                is_finished = True
            returned_chunk = GenericStreamingChunk(
                text=text,
                tool_use=tool_use,
                is_finished=is_finished,
                finish_reason=finish_reason,
                usage=usage,
                index=index,
                provider_specific_fields=provider_specific_fields,
            )
            return returned_chunk
        except json.JSONDecodeError:
            raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -2839,7 +2839,7 @@ def prompt_factory(
    if custom_llm_provider == "ollama":
        return ollama_pt(model=model, messages=messages)
    elif custom_llm_provider == "anthropic":
-        if model == "claude-instant-1" or model == "claude-2":
+        if litellm.AnthropicTextConfig._is_anthropic_text_model(model):
            return anthropic_pt(messages=messages)
        return anthropic_messages_pt(
            messages=messages, model=model, llm_provider=custom_llm_provider
--- a/litellm/main.py
+++ b/litellm/main.py
@ -99,7 +99,6 @@ from .llms import (
 )
 from .llms.ai21 import completion as ai21
 from .llms.anthropic.chat import AnthropicChatCompletion
 from .llms.anthropic.completion import AnthropicTextCompletion
 from .llms.azure.audio_transcriptions import AzureAudioTranscription
 from .llms.azure.azure import AzureChatCompletion, _check_dynamic_azure_params
 from .llms.azure.chat.o1_handler import AzureOpenAIO1ChatCompletion
@ -204,7 +203,6 @@ together_ai_text_completions = TogetherAITextCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
 anthropic_text_completions = AnthropicTextCompletion()
 azure_chat_completions = AzureChatCompletion()
 azure_o1_chat_completions = AzureOpenAIO1ChatCompletion()
 azure_text_completions = AzureTextCompletion()
@ -464,6 +462,7 @@ async def acompletion(
            or custom_llm_provider == "sagemaker"
            or custom_llm_provider == "sagemaker_chat"
            or custom_llm_provider == "anthropic"
            or custom_llm_provider == "anthropic_text"
            or custom_llm_provider == "predibase"
            or custom_llm_provider == "bedrock"
            or custom_llm_provider == "databricks"
@ -1705,6 +1704,41 @@ def completion(  # type: ignore # noqa: PLR0915
                api_key=clarifai_key,
                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )
        elif custom_llm_provider == "anthropic_text":
            api_key = (
                api_key
                or litellm.anthropic_key
                or litellm.api_key
                or os.environ.get("ANTHROPIC_API_KEY")
            )
            custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
            api_base = (
                api_base
                or litellm.api_base
                or get_secret("ANTHROPIC_API_BASE")
                or get_secret("ANTHROPIC_BASE_URL")
                or "https://api.anthropic.com/v1/complete"
            )
            if api_base is not None and not api_base.endswith("/v1/complete"):
                api_base += "/v1/complete"
            response = base_llm_http_handler.completion(
                model=model,
                stream=stream,
                messages=messages,
                acompletion=acompletion,
                api_base=api_base,
                model_response=model_response,
                optional_params=optional_params,
                litellm_params=litellm_params,
                custom_llm_provider="anthropic_text",
                timeout=timeout,
                headers=headers,
                encoding=encoding,
                api_key=api_key,
                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )
        elif custom_llm_provider == "anthropic":
            api_key = (
                api_key
@ -1713,69 +1747,38 @@ def completion(  # type: ignore # noqa: PLR0915
                or os.environ.get("ANTHROPIC_API_KEY")
            )
            custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
            # call /messages
            # default route for all anthropic models
            api_base = (
                api_base
                or litellm.api_base
                or get_secret("ANTHROPIC_API_BASE")
                or get_secret("ANTHROPIC_BASE_URL")
                or "https://api.anthropic.com/v1/messages"
            )
-            if (model == "claude-2") or (model == "claude-instant-1"):
+            if api_base is not None and not api_base.endswith("/v1/messages"):
-                # call anthropic /completion, only use this route for claude-2, claude-instant-1
+                api_base += "/v1/messages"
                api_base = (
                    api_base
                    or litellm.api_base
                    or get_secret("ANTHROPIC_API_BASE")
                    or get_secret("ANTHROPIC_BASE_URL")
                    or "https://api.anthropic.com/v1/complete"
                )
-                if api_base is not None and not api_base.endswith("/v1/complete"):
+            response = anthropic_chat_completions.completion(
-                    api_base += "/v1/complete"
+                model=model,
-
+                messages=messages,
-                response = anthropic_text_completions.completion(
+                api_base=api_base,
-                    model=model,
+                acompletion=acompletion,
-                    messages=messages,
+                custom_prompt_dict=litellm.custom_prompt_dict,
-                    api_base=api_base,
+                model_response=model_response,
-                    acompletion=acompletion,
+                print_verbose=print_verbose,
-                    custom_prompt_dict=litellm.custom_prompt_dict,
+                optional_params=optional_params,
-                    model_response=model_response,
+                litellm_params=litellm_params,
-                    print_verbose=print_verbose,
+                logger_fn=logger_fn,
-                    optional_params=optional_params,
+                encoding=encoding,  # for calculating input/output tokens
-                    litellm_params=litellm_params,
+                api_key=api_key,
-                    logger_fn=logger_fn,
+                logging_obj=logging,
-                    encoding=encoding,  # for calculating input/output tokens
+                headers=headers,
-                    api_key=api_key,
+                timeout=timeout,
-                    logging_obj=logging,
+                client=client,
-                    headers=headers,
+                custom_llm_provider=custom_llm_provider,
-                )
+            )
            else:
                # call /messages
                # default route for all anthropic models
                api_base = (
                    api_base
                    or litellm.api_base
                    or get_secret("ANTHROPIC_API_BASE")
                    or get_secret("ANTHROPIC_BASE_URL")
                    or "https://api.anthropic.com/v1/messages"
                )
                if api_base is not None and not api_base.endswith("/v1/messages"):
                    api_base += "/v1/messages"
                response = anthropic_chat_completions.completion(
                    model=model,
                    messages=messages,
                    api_base=api_base,
                    acompletion=acompletion,
                    custom_prompt_dict=litellm.custom_prompt_dict,
                    model_response=model_response,
                    print_verbose=print_verbose,
                    optional_params=optional_params,
                    litellm_params=litellm_params,
                    logger_fn=logger_fn,
                    encoding=encoding,  # for calculating input/output tokens
                    api_key=api_key,
                    logging_obj=logging,
                    headers=headers,
                    timeout=timeout,
                    client=client,
                    custom_llm_provider=custom_llm_provider,
                )
            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2830,6 +2830,32 @@ def get_optional_params(  # noqa: PLR0915
                else False
            ),
        )
    elif custom_llm_provider == "anthropic_text":
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
        optional_params = litellm.AnthropicTextConfig().map_openai_params(
            model=model,
            non_default_params=non_default_params,
            optional_params=optional_params,
            drop_params=(
                drop_params
                if drop_params is not None and isinstance(drop_params, bool)
                else False
            ),
        )
        _check_valid_arg(supported_params=supported_params)
        optional_params = litellm.AnthropicTextConfig().map_openai_params(
            model=model,
            non_default_params=non_default_params,
            optional_params=optional_params,
            drop_params=(
                drop_params
                if drop_params is not None and isinstance(drop_params, bool)
                else False
            ),
        )
    elif custom_llm_provider == "cohere":
        ## check if unsupported param passed in
        supported_params = get_supported_openai_params(
@ -4208,7 +4234,7 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
    if llm_provider == "openai" or llm_provider == "text-completion-openai":
        api_key = api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
    # anthropic
-    elif llm_provider == "anthropic":
+    elif llm_provider == "anthropic" or llm_provider == "anthropic_text":
        api_key = api_key or litellm.anthropic_key or get_secret("ANTHROPIC_API_KEY")
    # ai21
    elif llm_provider == "ai21":
@ -6251,6 +6277,8 @@ class ProviderConfigManager:
            return litellm.ClarifaiConfig()
        elif litellm.LlmProviders.ANTHROPIC == provider:
            return litellm.AnthropicConfig()
        elif litellm.LlmProviders.ANTHROPIC_TEXT == provider:
            return litellm.AnthropicTextConfig()
        elif litellm.LlmProviders.VERTEX_AI == provider:
            if "claude" in model:
                return litellm.VertexAIAnthropicConfig()
--- a/tests/llm_translation/test_anthropic_text_completion.py
+++ b/tests/llm_translation/test_anthropic_text_completion.py
@ -0,0 +1,73 @@
 import asyncio
 import os
 from re import T
 import sys
 import traceback
 from dotenv import load_dotenv
 import litellm.types
 import litellm.types.utils
 from litellm.llms.anthropic.chat import ModelResponseIterator
 load_dotenv()
 import io
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 from typing import Optional
 from unittest.mock import MagicMock, patch
 import pytest
@pytest.mark.asyncio
@pytest.mark.parametrize("model", ["claude-2", "anthropic/claude-2"])
 async def test_acompletion_claude2(model):
    try:
        litellm.set_verbose = True
        messages = [
            {
                "role": "system",
                "content": "Your goal is generate a joke on the topic user gives.",
            },
            {"role": "user", "content": "Generate a 3 liner joke for me"},
        ]
        # test without max-tokens
        response = await litellm.acompletion(model=model, messages=messages)
        # Add any assertions here to check the response
        print(response)
        print(response.usage)
        print(response.usage.completion_tokens)
        print(response["usage"]["completion_tokens"])
        # print("new cost tracking")
    except litellm.InternalServerError:
        pytest.skip("model is overloaded.")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
 async def test_acompletion_claude2_stream():
    try:
        litellm.set_verbose = False
        messages = [
            {
                "role": "system",
                "content": "Your goal is generate a joke on the topic user gives.",
            },
            {"role": "user", "content": "Generate a 3 liner joke for me"},
        ]
        # test without max-tokens
        response = await litellm.acompletion(
            model="anthropic_text/claude-2",
            messages=messages,
            stream=True,
            max_tokens=10,
        )
        async for chunk in response:
            print(chunk)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -1126,32 +1126,6 @@ def test_completion_mistral_api_modified_input():
            pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
 async def test_acompletion_claude2_1():
    try:
        litellm.set_verbose = True
        print("claude2.1 test request")
        messages = [
            {
                "role": "system",
                "content": "Your goal is generate a joke on the topic user gives.",
            },
            {"role": "user", "content": "Generate a 3 liner joke for me"},
        ]
        # test without max-tokens
        response = await litellm.acompletion(model="claude-2.1", messages=messages)
        # Add any assertions here to check the response
        print(response)
        print(response.usage)
        print(response.usage.completion_tokens)
        print(response["usage"]["completion_tokens"])
        # print("new cost tracking")
    except litellm.InternalServerError:
        pytest.skip("model is overloaded.")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # def test_completion_oobabooga():
 #     try:
 #         response = completion(