Tokenize texts in `df[text_cols]` in parallel using `n_workers`

tokenize_df(
  df,
  text_cols,
  n_workers = 6,
  rules = NULL,
  mark_fields = NULL,
  tok = NULL,
  tok_text_col = "text"
)

Arguments

df

data frame

text_cols

text columns

n_workers

number of workers

rules

rules

mark_fields

mark_fields

tok

tokenizer

tok_text_col

tok_text_col

Value

None