Source code for claf.tokens.token_maker

[docs]class TokenMaker: """ Token Maker (Data Transfer Object) Token Maker consists of Tokenizer, Indexer, Embedding and Vocab * Kwargs: tokenizer: Tokenizer (claf.tokens.tokenizer.base) indexer: TokenIndexer (claf.tokens.indexer.base) embedding_fn: wrapper function of TokenEmbedding (claf.tokens.embedding.base) vocab_config: config dict of Vocab (claf.tokens.vocaburary) """ # Token Type List FEATURE_TYPE = "feature" # Do not use embedding, pass indexed_feature BERT_TYPE = "bert" CHAR_TYPE = "char" COVE_TYPE = "cove" ELMO_TYPE = "elmo" EXACT_MATCH_TYPE = "exact_match" WORD_TYPE = "word" FREQUENT_WORD_TYPE = "frequent_word" LINGUISTIC_TYPE = "linguistic" def __init__( self, token_type, tokenizer=None, indexer=None, embedding_fn=None, vocab_config=None ): self.type_name = token_type self._tokenizer = tokenizer self._indexer = indexer self._embedding_fn = embedding_fn self._vocab_config = vocab_config @property def tokenizer(self): return self._tokenizer @tokenizer.setter def tokenizer(self, tokenizer): self._tokenizer = tokenizer @property def indexer(self): return self._indexer @indexer.setter def indexer(self, indexer): self._indexer = indexer @property def embedding_fn(self): return self._embedding_fn @embedding_fn.setter def embedding_fn(self, embedding_fn): self._embedding_fn = embedding_fn @property def vocab_config(self): return self._vocab_config @vocab_config.setter def vocab_config(self, vocab_config): self._vocab_config = vocab_config @property def vocab(self): return self._vocab @vocab.setter def vocab(self, vocab): self._vocab = vocab
[docs] def set_vocab(self, vocab): self._indexer.set_vocab(vocab) self._vocab = vocab