cerebras.modelzoo.data_preparation.data_preprocessing.data_dedup.to_hash.get_documents#

cerebras.modelzoo.data_preparation.data_preprocessing.data_dedup.to_hash.get_documents(input_dir, jsonl_key, format, threshold, job_id, n_jobs)[source]#