(docs) encode docstring

This commit is contained in:
ishaan-jaff 2023-10-30 14:10:29 -07:00
parent 313eee17e3
commit c61fa70ba0
2 changed files with 11 additions and 1 deletions

View file

@ -39,7 +39,7 @@ def test_encoding_and_decoding():
# openai tokenizer # openai tokenizer
openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text) openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text)
openai_text = decode(model="gpt-3.5-turbo", tokens=openai_tokens) openai_text = encode(model="gpt-3.5-turbo", tokens=openai_tokens)
assert openai_text == sample_text assert openai_text == sample_text
except: except:

View file

@ -911,6 +911,16 @@ def _select_tokenizer(model: str):
return {"type": "openai_tokenizer", "tokenizer": encoding} return {"type": "openai_tokenizer", "tokenizer": encoding}
def encode(model: str, text: str): def encode(model: str, text: str):
"""
Encodes the given text using the specified model.
Args:
model (str): The name of the model to use for tokenization.
text (str): The text to be encoded.
Returns:
enc: The encoded text.
"""
tokenizer_json = _select_tokenizer(model=model) tokenizer_json = _select_tokenizer(model=model)
enc = tokenizer_json["tokenizer"].encode(text) enc = tokenizer_json["tokenizer"].encode(text)
return enc return enc