cerebras.modelzoo.data_preparation.nlp.slimpajama.dedup.to_hash.get_documents#

cerebras.modelzoo.data_preparation.nlp.slimpajama.dedup.to_hash.get_documents(input_dir, index_start, index_end, output_dir, dataset_name)[source]#