claf.data.dataset package

Submodules

class claf.data.dataset.base.DatasetBase[source]

Bases: torch.utils.data.dataset.Dataset

Dataset Base Model An abstract class representing a Dataset.

collate_fn(cuda_device_id)[source]
get_ground_truth()[source]
get_ground_truths(data_idxs)[source]
get_predict()[source]
lazy_evaluation(index)[source]
class claf.data.dataset.seq_cls.SeqClsDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Sequence Classification

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_class_text_with_idx(class_index)[source]
get_ground_truth(data_id)[source]
get_id(data_index)[source]
property num_classes
property sequence_maxlen
class claf.data.dataset.squad.SQuADDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

SQuAD Dataset

compatible with v1.1 and v2.0

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

property context_maxlen
get_context(data_index)[source]
get_ground_truths(data_index)[source]
get_predict(data_index, start, end)[source]
get_qid(data_index)[source]
get_text_span(data_index)[source]
get_text_with_index(data_index, start, end)[source]
property question_maxlen
class claf.data.dataset.wikisql.WikiSQLDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

WikiSQL Dataset

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_ground_truth(data_index)[source]
get_id(data_index)[source]
get_table_id(data_index)[source]
get_tokenized_question(data_index)[source]
property question_maxlen

Module contents

class claf.data.dataset.MultiTaskBertDataset(batches, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Multi-Task GLUE using BERT

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]
init_iterators()[source]
class claf.data.dataset.RegressionBertDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Regression using BERT

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_ground_truth(data_id)[source]
get_id(data_index)[source]
property sequence_maxlen
class claf.data.dataset.SeqClsDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Sequence Classification

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_class_text_with_idx(class_index)[source]
get_ground_truth(data_id)[source]
get_id(data_index)[source]
property num_classes
property sequence_maxlen
class claf.data.dataset.SeqClsBertDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Sequence Classification using BERT

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_class_text_with_idx(class_index)[source]
get_ground_truth(data_id)[source]
get_id(data_index)[source]
property num_classes
property sequence_maxlen
class claf.data.dataset.SQuADDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

SQuAD Dataset

compatible with v1.1 and v2.0

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

property context_maxlen
get_context(data_index)[source]
get_ground_truths(data_index)[source]
get_predict(data_index, start, end)[source]
get_qid(data_index)[source]
get_text_span(data_index)[source]
get_text_with_index(data_index, start, end)[source]
property question_maxlen
class claf.data.dataset.SQuADBertDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

SQuAD Dataset for BERT

compatible with v1.1 and v2.0

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

property bert_input_maxlen
collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_bert_tokens(data_index)[source]
get_context(data_index)[source]
get_ground_truths(data_index)[source]
get_id(data_index)[source]
get_predict(data_index, start, end)[source]
get_qid(data_index)[source]
get_qid_index(data_index)[source]
get_text_with_index(data_index, start, end)[source]
class claf.data.dataset.TokClsBertDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

Dataset for Token Classification

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_ground_truth(data_id)[source]
get_id(data_index)[source]
get_tag_text_with_idx(tag_index)[source]
get_tag_texts_with_idxs(tag_idxs)[source]
property num_tags
property sequence_maxlen
class claf.data.dataset.WikiSQLDataset(batch, vocab, helper=None)[source]

Bases: claf.data.dataset.base.DatasetBase

WikiSQL Dataset

  • Args:

    batch: Batch DTO (claf.data.batch)

  • Kwargs:

    helper: helper from data_reader

collate_fn(cuda_device_id=None)[source]

collate: indexed features and labels -> tensor

get_ground_truth(data_index)[source]
get_id(data_index)[source]
get_table_id(data_index)[source]
get_tokenized_question(data_index)[source]
property question_maxlen