diff --git a/litellm/__init__.py b/litellm/__init__.py index 13e9f3dd3..cc6f88c0c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -276,7 +276,7 @@ provider_list: List = [ "vllm", "nlp_cloud", "bedrock", - "petals," + "petals", "custom", # custom apis ] diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index e056d2049..648834540 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -32,8 +32,6 @@ def completion( model = model - # You could also use "meta-llama/Llama-2-70b-chat-hf" or any other supported model from 🤗 Model Hub - tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, add_bos_token=False) model = AutoDistributedModelForCausalLM.from_pretrained(model) model = model.cuda() @@ -76,6 +74,8 @@ def completion( print_verbose(f"raw model_response: {outputs}") ## RESPONSE OBJECT output_text = tokenizer.decode(outputs[0]) + print("output text") + print(output_text) model_response["choices"][0]["message"]["content"] = output_text ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. diff --git a/litellm/main.py b/litellm/main.py index eb209db7e..28292a6c4 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -959,8 +959,9 @@ def completion( or custom_llm_provider == "petals-team" or model in litellm.petals_models ): - custom_llm_provider = "baseten" - + custom_llm_provider = "petals" + print("model on petals") + print(model) model_response = petals.completion( model=model, messages=messages, @@ -970,7 +971,6 @@ def completion( litellm_params=litellm_params, logger_fn=logger_fn, encoding=encoding, - api_key=baseten_key, logging_obj=logging ) if inspect.isgenerator(model_response) or (stream == True): diff --git a/litellm/utils.py b/litellm/utils.py index 648bb63e7..669611352 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1122,6 +1122,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): ## nlp_cloud elif model in litellm.nlp_cloud_models: custom_llm_provider = "nlp_cloud" + ## petals + elif model in litellm.petals_models: + custom_llm_provider = "petals" if custom_llm_provider is None or custom_llm_provider=="": raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers")