SentencePiece tokenizer for `lang`
SentencePieceTokenizer(
lang = "en",
special_toks = NULL,
sp_model = NULL,
vocab_sz = NULL,
max_vocab_sz = 30000,
model_type = "unigram",
char_coverage = NULL,
cache_dir = "tmp"
)
lang
special_toks
sp_model
vocab_sz
max_vocab_sz
model_type
char_coverage
cache_dir
None