adding anthropic llm class - handles sync + stream

2025-04-24 18:24:20 +00:00 · 2023-08-12 16:34:32 -07:00 · 2023-08-12 16:34:32 -07:00 · bc767cc42a
commit bc767cc42a
parent e6da2f8bf7
27 changed files with 219 additions and 693 deletions
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -0,0 +1,97 @@
+import os, json, sseclient
+from enum import Enum
+import requests
+from litellm import logging
+import time 
+from typing import Callable
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman:"
+    AI_PROMPT = "\n\nAssistant:"
+
+class AnthropicLLM: 
+    
+    def __init__(self, default_max_tokens_to_sample, api_key=None):
+        self.default_max_tokens_to_sample = default_max_tokens_to_sample
+        self.completion_url = "https://api.anthropic.com/v1/complete"
+        self.validate_environment(api_key=api_key)
+    
+    def validate_environment(self, api_key): # set up the environment required to run the model 
+        # set the api key 
+        try:
+            self.api_key = os.getenv("ANTHROPIC_API_KEY") if "ANTHROPIC_API_KEY" in os.environ else api_key
+            if self.api_key == None:
+                raise Exception
+            
+            self.headers = {
+                "accept": "application/json",
+                "anthropic-version": "2023-06-01",
+                "content-type": "application/json",
+                "x-api-key": self.api_key 
+            }
+
+        except:
+            raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
+        pass  
+
+    def _stream(self): # logic for handling streaming with the LLM API 
+        pass
+
+    def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
+        model = model
+        prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+                else:
+                    prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+            else:
+                prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+        prompt += f"{AnthropicConstants.AI_PROMPT.value}"
+        if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'): 
+            max_tokens = optional_params["max_tokens"]
+        else:
+            max_tokens = self.default_max_tokens_to_sample
+        data = {
+            "model": model,
+            "prompt": prompt,
+            "max_tokens_to_sample": max_tokens,
+            **optional_params
+        }
+
+        ## LOGGING
+        logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
+        ## COMPLETION CALL
+        response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data))
+        if "stream" in optional_params and optional_params["stream"] == True:
+            return response.iter_lines()
+        else:
+            ## LOGGING
+            logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
+            print_verbose(f"raw model_response: {response.text}")
+            ## RESPONSE OBJECT
+            completion_response = response.json()
+            print(f"completion_response: {completion_response}")
+            if "error" in completion_response:
+                raise Exception(completion_response["error"])
+            else:
+                model_response["choices"][0]["message"]["content"] = completion_response["completion"]    
+            ## CALCULATING USAGE
+            prompt_tokens = 0
+            completion_tokens = 0
+            
+            
+            model_response["created"] = time.time()
+            model_response["model"] = model
+            model_response["usage"] = {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens
+                }
+            return model_response
+    
+    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
+        pass 
+    
+    def stream(): # logic for how to parse in-out model completion streams
+        pass