cerebras.modelzoo.data_preparation.nlp.pubmed.TextSharding.Sharding#

class cerebras.modelzoo.data_preparation.nlp.pubmed.TextSharding.Sharding(input_files, output_name_prefix, n_training_shards, n_test_shards, fraction_test_set)[source]#

Bases: object

Methods

distribute_articles_over_shards

get_sentences_per_shard

init_output_files

load_articles

segment_articles_into_sentences

write_shards_to_disk

write_single_shard