日常需要传递几G或者十几G的fastq数据,在cp或者rysnc完后,是需要对复制的数据进行md5码检验的。本文先获取源文件md5码再获取目标文件md5码,最后检验两者是否一致,从而判断复制是否完整。更多知识分享请到 https://zouhua.top/。
获取md5码
md5sum RawData/filename > RawData_md5sum.tsvmd5sum Rename/filename > Rename_md5sum.tsv
主程序
#!/usr/bin/pythonimport sysimport reimport osimport argparse as apdef parse_argument(args):parser = ap.ArgumentParser(description='check')parser.add_argument('-f1', '--file1', metavar='<file1>', type=str)parser.add_argument('-f2', '--file2', metavar='<file2>', type=str)parser.add_argument('-o', '--out', metavar='<out>', type=str)return parser.parse_args()def main():args = parse_argument(sys.argv)dict_f1 = {}with open(args.file1, 'r') as f:lines = f.readlines()for line in lines:line = line.strip().split()dict_f1[line[0]] = line[1]out_f = open(args.out, 'w')with open(args.file2, 'r') as f2:lines2 = f2.readlines()for line2 in lines2:line2 = line2.strip().split()if line2[0] in dict_f1.keys():res = "\t".join([line2[1], dict_f1[line2[0]], str(line2[0]), "Correct"])out_f.write(res + "\n")else:res = "\t".join([line2[1], line[0], "Wrong"])out_f.write(res + "\n")out_f.close()main()
运行
python check_md5.py -f1 RawData_md5sum.tsv -f2 Rename_md5sum.tsv -o Checkout_md5.tsv
